25 #ifndef STA_FIELD_CLASS_CUDA_H
26 #define STA_FIELD_CLASS_CUDA_H
29 #include "stensorcuda.h"
30 #include "cuda_runtime.h"
70 stafieldGPU(
const stafieldGPU & field) : _stafield<T>(), data(NULL)
78 stafieldGPU(
const std::size_t
shape[],
82 : _stafield<T>(), data(NULL)
88 throw STAError(
"L must be >= 0");
91 this->shape[0]=shape[0];
92 this->shape[1]=shape[1];
93 this->shape[2]=shape[2];
96 int numVoxel=shape[0]*shape[1]*shape[2];
97 if (hanalysis::verbose>0)
98 printf(
"L: %d , (%i,%i,%i) , // %i\n",L,shape[0],shape[1],shape[2],numcomponents);
99 if (hanalysis::verbose>0)
100 printf(
"allocating %i bytes\n",numcomponents*numVoxel*
sizeof(std::complex<T>));
102 hanalysis_cuda::gpu_malloc<T>(this->data,numcomponents*numVoxel*2*
sizeof(T));
129 this->shape[0]=shape[0];
130 this->shape[1]=shape[1];
131 this->shape[2]=shape[2];
133 this->own_memory=
false;
145 if ((this->L==-1)&&(f.
L==-1))
148 if ((f.object_is_dead_soon==1)
152 if (f.object_is_dead_soon>1)
153 throw STAError(
"error: something went wrong with the memory managemant \n");
155 if (this->own_memory && (this->data!=NULL) && (this->object_is_dead_soon<2))
156 delete [] this->data;
164 this->shape[0]=f.
shape[0];
165 this->shape[1]=f.
shape[1];
166 this->shape[2]=f.
shape[2];
168 this->setElementSize(f.getElementSize());
172 this->stride=f.stride;
173 this->own_memory=f.own_memory;
183 throw STAError(
"warning: operator= (stride!=0) shared memory block but alocating new (own) memory would be nrequired \n");
185 if (!this->own_memory)
186 throw STAError(
"warning: operator= (!own_memory): shared memory block but alocating new (own) memory would be nrequired \n");
190 if (this->own_memory && (this->data!=NULL))
191 cudaFree(this->data);
195 this->shape[0]=f.
shape[0];
196 this->shape[1]=f.
shape[1];
197 this->shape[2]=f.
shape[2];
199 this->setElementSize(f.getElementSize());
203 this->own_memory=
true;
206 int numVoxel=this->shape[0]*this->shape[1]*this->shape[2];
207 hanalysis_cuda::gpu_malloc(this->data,numcomponents*numVoxel*
sizeof(std::complex<T>));
211 if ((f.stride==0)&&(this->stride==0))
214 this->setElementSize(f.getElementSize());
215 hanalysis_cuda::gpu_memcpy_d2d(f.data,this->data,this->shape[0]*this->shape[1]*this->shape[2]*numcomponents*
sizeof(std::complex<T>));
219 this->setElementSize(f.getElementSize());
221 throw STAError(
"BullShit");
225 throw STAError(
"operator= this cannot happen ! the input field must be STA_OFIELD_SINGLE");
227 int numcomponents_new=this->L+1;
229 numcomponents_new=2*this->L+1;
230 numcomponents_new*=2;
235 hanalysis_cuda::copySubfield2Subfield(f.data,
251 stafield<T> gpu2cpu()
const
256 this->set_death(&result);
266 stafieldGPU & operator=(
const stafield<T> & f)
273 if ((this->L==-1)&&(f.getRank()==-1))
275 if (!f.oneBlockMem())
276 throw (
hanalysis::STAError(
"error copying host memory to device memory, the host memory must be alignd in one single block!"));
281 throw STAError(
"warning: operator= (stride!=0) shared memory block but alocating new (own) memory would be nrequired \n");
283 if (!this->own_memory)
284 throw STAError(
"warning: operator= (!own_memory): shared memory block but alocating new (own) memory would be nrequired \n");
286 if (this->own_memory && (this->data!=NULL))
287 cudaFree(this->data);
291 this->shape[0]=f.getShape()[0];
292 this->shape[1]=f.getShape()[1];
293 this->shape[2]=f.getShape()[2];
295 this->setElementSize(f.getElementSize());
299 this->own_memory=
true;
302 hanalysis_cuda::gpu_malloc(this->data,numcomponents*this->getNumVoxel()*
sizeof(std::complex<T>));
308 this->setElementSize(f.getElementSize());
309 T *
const src=( T*
const)(f.getDataConst());
310 hanalysis_cuda::gpu_memcpy_h2d(src,this->data,this->getNumVoxel()*numcomponents*
sizeof(std::complex<T>));
312 throw (
hanalysis::STAError(
"error copying host memory to device memory, the (existing) device memory must be alignd in one single block!"));
349 component_data=this->data+2*offset;
356 this->set_death(&view);
357 view.stride=numcomponents;
359 view.setElementSize(this->getElementSize());
384 if (this->own_memory && (this->data!=NULL))
387 printf(
"destroying stafieldGPU %i / remaining: %i [own]",this->classcount_id,--classcount);
389 if (this->object_is_dead_soon<2)
392 printf(
" (deleting data)\n");
394 if (hanalysis::verbose>0)
395 printf(
"field destrucor -> deleting data\n");
396 cudaFree(this->data);
400 printf(
" (not deleting data, still having references)\n");
406 printf(
"destroying stafieldGPU %i / remaining: %i [empty]\n",this->classcount_id,--classcount);
408 printf(
"destroying stafieldGPU %i / remaining: %i [view]\n",this->classcount_id,--classcount);
410 if (hanalysis::verbose>0)
411 printf(
"field destrucor -> --\n");
418 static void Prod(
const stafieldGPU & stIn1,
419 const stafieldGPU & stIn2,
422 bool normalize=
false,
423 std::complex<T> alpha= T( 1 ),
424 bool clear_result =
false)
426 if ( ( std::abs ( stIn1.getRank()-stIn2.getRank() ) >J ) ||
427 ( J>std::abs ( stIn1.getRank()+stIn2.getRank() ) ) )
429 if ( ( ( stIn1.getRank()+stIn2.getRank()+J ) %2!=0 ) && ( normalize ) )
432 if (stOut.getRank()!=J)
436 if ((!stafieldGPU::equalShape(stIn1,stOut))||(!stafieldGPU::equalShape(stIn2,stOut)))
438 if ((stIn1.getStorage()!=stOut.getStorage())||(stIn2.getStorage()!=stOut.getStorage()))
442 if (stIn1.getType()!=stIn2.getType())
444 if (stOut.getType()!=stIn1.getType())
447 int stride_in1 = stIn1.getStride();
448 int stride_in2 = stIn2.getStride();
449 int stride_out = stOut.getStride();
451 if (hanalysis::verbose>0)
452 printf(
"Prod: stride_in1: %i,stride_in2: %i stride_out %i\n",stride_in1,stride_in2,stride_out);
455 hanalysis_cuda::sta_product(
456 stIn1.getDataConst(),
457 stIn2.getDataConst(),
497 static void Deriv(
const stafieldGPU & stIn,
500 bool conjugate=
false,
501 std::complex<T> alpha= T( 1 ),
502 bool clear_result =
false,
506 if (!stafieldGPU::equalShape(stIn,stOut))
508 if (stIn.getStorage()!=stOut.getStorage())
512 if (stOut.getType()!=stIn.getType())
514 if (stOut.getRank()!=stIn.getRank()+Jupdown)
517 int stride_in = stIn.getStride();
518 int stride_out = stOut.getStride();
520 if (hanalysis::verbose>0)
521 printf(
"Deriv: stride_in: %i stride_out %i\n",stride_in,stride_out);
524 hanalysis_cuda::sta_derivatives(
533 stIn.getElementSize(),
564 static void FFT(
const stafieldGPU & stIn,
567 bool conjugate=
false,
570 if ((!stafieldGPU::equalShape(stIn,stOut))||
571 (stIn.field_type!=stOut.field_type)||
575 (stIn.field_storage!=stOut.field_storage)))
578 std::size_t ncomponents_in=hanalysis::order2numComponents(stIn.getStorage(),stIn.getType(),stIn.L);
579 std::size_t ncomponents_out=hanalysis::order2numComponents(stOut.getStorage(),stOut.getType(),stOut.L);
580 if (((stIn.stride!=0)&&(ncomponents_in!=stIn.stride))||((stOut.stride!=0)&&(ncomponents_out!=stOut.stride)))
582 if ((stIn.data==stOut.data))
592 stOut.field_storage=new_field_storage;
600 int stride_in = stIn.getStride();
601 int stride_out = stOut.getStride();
602 int ncomponents=hanalysis::order2numComponents(stIn.getStorage(),stIn.getType(),stIn.L);
604 if (hanalysis::verbose>0)
605 printf(
"FFT: stride_in: %i stride_out %i , ncomp: %i\n",stride_in,stride_out,ncomponents);
607 hanalysis_cuda::sta_fft(
623 static void Lap(
const stafieldGPU & stIn,
626 bool clear_result =
false)
630 if (stIn.getStorage()!=stOut.getStorage())
632 int stride_in = stIn.getStride();
633 int stride_out = stOut.getStride();
634 int ncomponents=hanalysis::order2numComponents(stIn.getStorage(),stIn.getType(),stIn.L);
637 if (hanalysis::verbose>0)
638 printf(
"Lap: stride_in: %i stride_out %i , ncomp: %i\n",stride_in,stride_out,ncomponents);
642 hanalysis_cuda::sta_laplace (
667 this->set_death(&result);
668 Lap(*
this,result,alpha,
true);
680 bool conjugate=
false,
681 T alpha= T( 1 ))
const
686 this->set_death(&result);
687 FFT(*
this,result,forward,conjugate,alpha);
699 bool conjugate=
false,
700 std::complex<T> alpha= T( 1 ),
707 this->set_death(&result);
708 Deriv(*
this,result,J,conjugate,alpha,
true,accuracy);
723 bool normalize=
false,
724 std::complex<T> alpha= T( 1 ))
const
729 this->set_death(&result);
732 Prod(*
this,b,result,J,normalize,alpha,
true);
STA_FIELD_STORAGE
tensor field data storage
Definition: stensor.h:5163
tensor field has all components of even ranks :
Definition: stensor.h:5184
hanalysis::STA_FIELD_STORAGE field_storage
must be either STA_FIELD_STORAGE_C, STA_FIELD_STORAGE_R or STA_FIELD_STORAGE_RF
Definition: stafield.h:78
the STA error class
Definition: sta_error.h:68
stafieldGPU(const std::size_t shape[], int L, hanalysis::STA_FIELD_STORAGE field_storage, hanalysis::STA_FIELD_TYPE field_type, T *data)
Definition: stafield_cuda.h:114
represents spherical tensor fields (GPU version)
Definition: stafield_cuda.h:44
tensor field has one single component of rank :
Definition: stensor.h:5180
STA_FIELD_TYPE
tensor field data interpretations according to certain symmetries
Definition: stensor.h:5177
int L
tensor rank
Definition: stafield.h:82
std::size_t shape[3]
image shape
Definition: stafield.h:74
stafieldGPU fft(bool forward, bool conjugate=false, T alpha=T(1)) const
see FFT
Definition: stafield_cuda.h:679
stafieldGPU lap(T alpha=T(1)) const
see Lap
Definition: stafield_cuda.h:661
stafieldGPU deriv(int J, bool conjugate=false, std::complex< T > alpha=T(1), int accuracy=0) const
see Deriv
Definition: stafield_cuda.h:698
Definition: stensor.h:5173
hanalysis::STA_FIELD_TYPE field_type
must be either STA_OFIELD_SINGLE, STA_OFIELD_FULL, STA_OFIELD_EVEN or STA_OFIELD_ODD ...
Definition: stafield.h:80
Definition: stensor.h:5167
const T * getDataConst() const
Definition: stafield_cuda.h:62
The STA-ImageAnalysisToolkit namespace.
Definition: stafield.h:55
tensor field has all components of odd ranks :
Definition: stensor.h:5186
T * getData()
Definition: stafield_cuda.h:55
stafieldGPU prod(const stafieldGPU &b, int J, bool normalize=false, std::complex< T > alpha=T(1)) const
see Prod
Definition: stafield_cuda.h:721
stafieldGPU operator[](int l) const
Definition: stafield_cuda.h:368
represents spherical tensor fields
Definition: stafield.h:62
Definition: stensor.h:5170