41 #ifndef PCL_GPU_PEOPLE__NCV_HPP_ 42 #define PCL_GPU_PEOPLE__NCV_HPP_ 44 #if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS 45 #define NCV_EXPORTS __declspec(dllexport) 51 #define WIN32_LEAN_AND_MEAN 54 #include <cuda_runtime.h> 57 #include <pcl/console/print.h> 80 #define NCV_CT_PREP_PASTE_AUX(a,b) a##b 81 #define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b)
86 #define NCV_CT_ASSERT(X) \ 87 typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \ 88 NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__) 96 #if !defined(__align__) && !defined(__CUDACC__) 97 #if defined(_WIN32) || defined(_WIN64) 98 #define __align__(n) __declspec(align(n)) 99 #elif defined(__unix__) 100 #define __align__(n) __attribute__((__aligned__(n))) 110 typedef bool NcvBool;
111 typedef long long Ncv64s;
113 #if defined(__APPLE__) && !defined(__CUDACC__) 114 typedef uint64_t Ncv64u;
116 typedef unsigned long long Ncv64u;
120 typedef unsigned int Ncv32u;
121 typedef short Ncv16s;
122 typedef unsigned short Ncv16u;
124 typedef unsigned char Ncv8u;
125 typedef float Ncv32f;
126 typedef double Ncv64f;
134 __host__ __device__
NcvRect8u() : x(0), y(0), width(0), height(0) {};
135 __host__ __device__
NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
144 __host__ __device__
NcvRect32s() : x(0), y(0), width(0), height(0) {};
145 __host__ __device__
NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}
154 __host__ __device__
NcvRect32u() : x(0), y(0), width(0), height(0) {};
155 __host__ __device__
NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}
163 __host__ __device__
NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}
171 __host__ __device__
NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
172 __host__ __device__
bool operator == (
const NcvSize32u &another)
const {
return this->width == another.
width && this->height == another.
height;}
191 NCV_CT_ASSERT(
sizeof(NcvBool) <= 4);
192 NCV_CT_ASSERT(
sizeof(Ncv64s) == 8);
193 NCV_CT_ASSERT(
sizeof(Ncv64u) == 8);
194 NCV_CT_ASSERT(
sizeof(Ncv32s) == 4);
195 NCV_CT_ASSERT(
sizeof(Ncv32u) == 4);
196 NCV_CT_ASSERT(
sizeof(Ncv16s) == 2);
197 NCV_CT_ASSERT(
sizeof(Ncv16u) == 2);
198 NCV_CT_ASSERT(
sizeof(Ncv8s) == 1);
199 NCV_CT_ASSERT(
sizeof(Ncv8u) == 1);
200 NCV_CT_ASSERT(
sizeof(Ncv32f) == 4);
201 NCV_CT_ASSERT(
sizeof(Ncv64f) == 8);
202 NCV_CT_ASSERT(
sizeof(
NcvRect8u) ==
sizeof(Ncv32u));
203 NCV_CT_ASSERT(
sizeof(
NcvRect32s) == 4 *
sizeof(Ncv32s));
204 NCV_CT_ASSERT(
sizeof(
NcvRect32u) == 4 *
sizeof(Ncv32u));
205 NCV_CT_ASSERT(
sizeof(
NcvSize32u) == 2 *
sizeof(Ncv32u));
215 const Ncv32u K_WARP_SIZE = 32;
216 const Ncv32u K_LOG2_WARP_SIZE = 5;
224 NCV_EXPORTS
void ncvDebugOutput(
const std::string &msg);
226 typedef void NCVDebugOutputHandler(
const std::string &msg);
228 NCV_EXPORTS
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
230 #define ncvAssertPrintCheck(pred, msg) \ 235 std::ostringstream oss; \ 236 oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \ 237 ncvDebugOutput(oss.str()); \ 241 #define ncvAssertPrintReturn(pred, msg, err) \ 244 ncvAssertPrintCheck(pred, msg); \ 245 if (!(pred)) return err; \ 248 #define ncvAssertReturn(pred, err) \ 249 ncvAssertPrintReturn(pred, "retcode=" << (int)err, err) 251 #define ncvAssertReturnNcvStat(ncvOp) \ 254 NCVStatus _ncvStat = ncvOp; \ 255 ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \ 258 #define ncvAssertCUDAReturn(cudacall, errCode) \ 261 cudaError_t res = cudacall; \ 262 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \ 265 #define ncvAssertCUDALastErrorReturn(errCode) \ 268 cudaError_t res = cudaGetLastError(); \ 269 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \ 286 NCV_INCONSISTENT_INPUT,
287 NCV_TEXTURE_BIND_ERROR,
288 NCV_DIMENSIONS_INVALID,
294 NCV_ALLOCATOR_NOT_INITIALIZED,
295 NCV_ALLOCATOR_BAD_ALLOC,
296 NCV_ALLOCATOR_BAD_DEALLOC,
297 NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
298 NCV_ALLOCATOR_DEALLOC_ORDER,
299 NCV_ALLOCATOR_BAD_REUSE,
302 NCV_MEM_RESIDENCE_ERROR,
303 NCV_MEM_INSUFFICIENT_CAPACITY,
305 NCV_HAAR_INVALID_PIXEL_STEP,
306 NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
307 NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
308 NCV_HAAR_TOO_LARGE_FEATURES,
309 NCV_HAAR_XML_LOADING_EXCEPTION,
311 NCV_NOIMPL_HAAR_TILTED_FEATURES,
314 NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
317 NPPST_SUCCESS = NCV_SUCCESS,
319 NPPST_CUDA_KERNEL_EXECUTION_ERROR,
320 NPPST_NULL_POINTER_ERROR,
321 NPPST_TEXTURE_BIND_ERROR,
330 NPPST_MEM_INSUFFICIENT_BUFFER,
331 NPPST_MEM_RESIDENCE_ERROR,
332 NPPST_MEM_INTERNAL_ERROR,
337 typedef Ncv32u NCVStatus;
339 #define NCV_SET_SKIP_COND(x) \ 340 bool __ncv_skip_cond = x 342 #define NCV_RESET_SKIP_COND(x) \ 345 #define NCV_SKIP_COND_BEGIN \ 346 if (!__ncv_skip_cond) { 348 #define NCV_SKIP_COND_END \ 359 typedef struct _NcvTimer *NcvTimer;
361 NCV_EXPORTS NcvTimer ncvStartTimer(
void);
363 NCV_EXPORTS
double ncvEndQueryTimerUs(NcvTimer t);
365 NCV_EXPORTS
double ncvEndQueryTimerMs(NcvTimer t);
378 NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
387 NCVMemoryTypeHostPageable,
388 NCVMemoryTypeHostPinned,
423 virtual NCVStatus alloc(
NCVMemSegment &seg,
size_t size) = 0;
426 virtual NcvBool isInitialized(
void)
const = 0;
427 virtual NcvBool isCounting(
void)
const = 0;
429 virtual NCVMemoryType memType(
void)
const = 0;
430 virtual Ncv32u alignment(
void)
const = 0;
431 virtual size_t maxSize(
void)
const = 0;
443 NCVMemStackAllocator(
const NCVMemStackAllocator &);
447 explicit NCVMemStackAllocator(Ncv32u alignment);
448 NCVMemStackAllocator(NCVMemoryType memT,
size_t capacity, Ncv32u alignment,
void *reusePtr=NULL);
449 virtual ~NCVMemStackAllocator();
454 virtual NcvBool isInitialized(
void)
const;
455 virtual NcvBool isCounting(
void)
const;
457 virtual NCVMemoryType memType(
void)
const;
458 virtual Ncv32u alignment(
void)
const;
459 virtual size_t maxSize(
void)
const;
463 NCVMemoryType _memType;
470 NcvBool bReusesMemory;
487 virtual NcvBool isInitialized(
void)
const;
488 virtual NcvBool isCounting(
void)
const;
490 virtual NCVMemoryType memType(
void)
const;
491 virtual Ncv32u alignment(
void)
const;
492 virtual size_t maxSize(
void)
const;
499 NCVMemoryType _memType;
509 NCV_EXPORTS NCVStatus memSegCopyHelper(
void *dst, NCVMemoryType dstType,
510 const void *src, NCVMemoryType srcType,
511 size_t sz, cudaStream_t cuStream);
514 NCV_EXPORTS NCVStatus memSegCopyHelper2D(
void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
515 const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
516 Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
540 _memtype = NCVMemoryTypeNone;
547 ncvAssertReturn(dst.
_length == this->_length, NCV_MEM_COPY_ERROR);
548 howMuch = this->_length *
sizeof(T);
552 ncvAssertReturn(dst.
_length *
sizeof(T) >= howMuch &&
553 this->_length *
sizeof(T) >= howMuch &&
554 howMuch > 0, NCV_MEM_COPY_ERROR);
556 ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
557 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
559 NCVStatus ncvStat = NCV_SUCCESS;
560 if (this->_memtype != NCVMemoryTypeNone)
563 this->_ptr, this->_memtype,
570 T *
ptr()
const {
return this->_ptr;}
571 size_t length()
const {
return this->_length;}
572 NCVMemoryType
memType()
const {
return this->_memtype;}
601 this->allocatedMem.clear();
603 ncvStat = allocator.
alloc(this->allocatedMem, length *
sizeof(T));
604 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc ctor:: alloc failed", );
606 this->_ptr = (T *)this->allocatedMem.begin.ptr;
607 this->_length = length;
608 this->_memtype = this->allocatedMem.begin.memtype;
615 ncvStat = allocator.dealloc(this->allocatedMem);
616 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc dtor:: dealloc failed");
623 return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
628 return allocator.alignment();
655 this->bReused =
false;
658 this->_length = memSegment.
size /
sizeof(T);
659 this->_ptr = (T *)memSegment.
begin.
ptr;
662 this->bReused =
true;
667 this->bReused =
false;
670 ncvAssertPrintReturn(length *
sizeof(T) <= memSegment.
size, \
671 "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
673 this->_length = length;
674 this->_ptr = (T *)memSegment.
begin.
ptr;
677 this->bReused =
true;
682 return this->bReused;
714 _memtype = NCVMemoryTypeNone;
719 return _pitch /
sizeof(T);
727 ncvAssertReturn(dst.
_pitch == this->_pitch &&
728 dst.
_height == this->_height, NCV_MEM_COPY_ERROR);
729 howMuch = this->_pitch * this->_height;
734 this->_pitch * this->_height >= howMuch &&
735 howMuch > 0, NCV_MEM_COPY_ERROR);
737 ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
738 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
740 NCVStatus ncvStat = NCV_SUCCESS;
741 if (this->_memtype != NCVMemoryTypeNone)
744 this->_ptr, this->_memtype,
753 ncvAssertReturn(this->width() >= roi.
width && this->height() >= roi.
height &&
755 ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
756 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
758 NCVStatus ncvStat = NCV_SUCCESS;
759 if (this->_memtype != NCVMemoryTypeNone)
762 this->_ptr, this->_pitch, this->_memtype,
769 T &
at(Ncv32u x, Ncv32u y)
const 771 NcvBool bOutRange = (x >= this->_width || y >= this->_height);
772 ncvAssertPrintCheck(!bOutRange,
"Error addressing matrix at [" << x <<
", " << y <<
"]");
777 return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x];
780 T *
ptr()
const {
return this->_ptr;}
781 Ncv32u
width()
const {
return this->_width;}
782 Ncv32u
height()
const {
return this->_height;}
784 Ncv32u
pitch()
const {
return this->_pitch;}
785 NCVMemoryType
memType()
const {
return this->_memtype;}
815 this->allocatedMem.clear();
817 Ncv32u widthBytes = width *
sizeof(T);
818 Ncv32u pitchBytes = alignUp(widthBytes, allocator.
alignment());
822 ncvAssertPrintReturn(pitch >= pitchBytes &&
823 (pitch & (allocator.
alignment() - 1)) == 0,
824 "NCVMatrixAlloc ctor:: incorrect pitch passed", );
828 Ncv32u requiredAllocSize = pitchBytes * height;
830 ncvStat = allocator.
alloc(this->allocatedMem, requiredAllocSize);
831 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc ctor:: alloc failed", );
833 this->_ptr = (T *)this->allocatedMem.begin.ptr;
834 this->_width = width;
835 this->_height = height;
836 this->_pitch = pitchBytes;
837 this->_memtype = this->allocatedMem.begin.memtype;
844 ncvStat = allocator.dealloc(this->allocatedMem);
845 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc dtor:: dealloc failed");
852 return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
857 return allocator.alignment();
885 this->bReused =
false;
888 Ncv32u widthBytes = width *
sizeof(T);
889 Ncv32u pitchBytes = alignUp(widthBytes, alignment);
893 if (!bSkipPitchCheck)
895 ncvAssertPrintReturn(pitch >= pitchBytes &&
896 (pitch & (alignment - 1)) == 0,
897 "NCVMatrixReuse ctor:: incorrect pitch passed", );
901 ncvAssertPrintReturn(pitch >= widthBytes,
"NCVMatrixReuse ctor:: incorrect pitch passed", );
906 ncvAssertPrintReturn(pitchBytes * height <= memSegment.
size, \
907 "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
909 this->_width = width;
910 this->_height = height;
911 this->_pitch = pitchBytes;
912 this->_ptr = (T *)memSegment.
begin.
ptr;
915 this->bReused =
true;
920 this->bReused =
false;
923 ncvAssertPrintReturn(roi.
x < mat.
width() && roi.
y < mat.
height() && \
925 "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
927 this->_width = roi.
width;
928 this->_height = roi.
height;
929 this->_pitch = mat.
pitch();
930 this->_ptr = &mat.
at(roi.
x, roi.
y);
931 this->_memtype = mat.
memType();
933 this->bReused =
true;
938 return this->bReused;
949 NCV_EXPORTS NCVStatus ncvGroupRectangles_host(
NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
952 NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
953 NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
955 NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
956 NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
958 NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
959 NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
961 NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
962 NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
964 #define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) ) 965 #define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x)) 966 #define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x)) 967 #define CLAMP_0_255(x) CLAMP(x,0,255) 969 #define SUB_BEGIN(type, name) struct { __inline type name 970 #define SUB_END(name) } name; 971 #define SUB_CALL(name) name.name 973 #define SQR(x) ((x)*(x)) 975 #define ncvSafeMatAlloc(name, type, alloc, width, height, err) \ 976 NCVMatrixAlloc<type> name(alloc, width, height); \ 977 ncvAssertReturn(name.isMemAllocated(), err); 979 #endif // PCL_GPU_PEOPLE__NCV_HPP_ __host__ __device__ NcvRect32u()
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
NcvBool isMemAllocated() const
__host__ __device__ NcvSize32s(Ncv32s width, Ncv32s height)
NcvBool isMemReused() const
Compile-time assert namespace.
Ncv32s height
Rectangle height.
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size)=0
NcvBool isMemReused() const
NCVMemoryType memType() const
Ncv32s width
Rectangle width.
NCVStatus copySolid(NCVVector< T > &dst, cudaStream_t cuStream, size_t howMuch=0) const
NCVVectorReuse(const NCVMemSegment &memSegment)
Ncv32u height
Rectangle height.
Ncv32u getAllocatorsAlignment() const
Ncv32s width
Rectangle width.
__host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y)
Ncv32u width
Rectangle width.
__host__ __device__ NcvSize32s()
NCVStatus copy2D(NCVMatrix< T > &dst, NcvSize32u roi, cudaStream_t cuStream) const
__host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y)
NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)
Ncv32u y
y-coordinate of upper left corner.
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
__host__ __device__ NcvPoint2D32u()
Ncv32s height
Rectangle height.
__host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height)
__host__ __device__ NcvPoint2D32s()
T & at(Ncv32u x, Ncv32u y) const
virtual Ncv32u alignment(void) const =0
NcvBool isMemAllocated() const
__host__ __device__ NcvSize32u()
NCVMemSegment getSegment() const
INCVMemAllocator (Interface)
Ncv32u height
Rectangle height.
NCVMemSegment getSegment() const
NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)
virtual ~INCVMemAllocator()=0
__host__ __device__ NcvRect32s()
NCVMemoryType memType() const
Ncv32s y
y-coordinate of upper left corner.
__host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height)
Ncv32u x
x-coordinate of upper left corner.
Ncv32u width
Rectangle width.
Ncv32s x
x-coordinate of upper left corner.
__host__ __device__ NcvRect8u()
__host__ __device__ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height)
Ncv32u getAllocatorsAlignment() const
NCVStatus copySolid(NCVMatrix< T > &dst, cudaStream_t cuStream, size_t howMuch=0) const
__host__ __device__ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height)
NCVMatrixReuse(const NCVMatrix< T > &mat, NcvRect32u roi)