1 #ifndef VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_
54 template <
typename T,
typename ScalarType1>
56 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha)
58 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
86 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
88 viennacl::traits::opencl_handle(vec2),
94 template <
typename T,
typename ScalarType1,
typename ScalarType2>
96 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
97 vector_base<T> const & vec3, ScalarType2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
99 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
100 assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(vec3).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
105 std::string kernel_name;
107 kernel_name =
"avbv_cpu_cpu";
109 kernel_name =
"avbv_cpu_gpu";
111 kernel_name =
"avbv_gpu_cpu";
113 kernel_name =
"avbv_gpu_gpu";
143 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
145 viennacl::traits::opencl_handle(vec2),
148 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)),
150 viennacl::traits::opencl_handle(vec3),
156 template <
typename T,
typename ScalarType1,
typename ScalarType2>
158 vector_base<T> const & vec2, ScalarType1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
159 vector_base<T> const & vec3, ScalarType2
const & beta,
vcl_size_t len_beta,
bool reciprocal_beta,
bool flip_sign_beta)
161 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
162 assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(vec3).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
167 std::string kernel_name;
169 kernel_name =
"avbv_v_cpu_cpu";
171 kernel_name =
"avbv_v_cpu_gpu";
173 kernel_name =
"avbv_v_gpu_cpu";
175 kernel_name =
"avbv_v_gpu_gpu";
205 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
207 viennacl::traits::opencl_handle(vec2),
210 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)),
212 viennacl::traits::opencl_handle(vec3),
224 template <
typename T>
240 viennacl::traits::opencl_handle(T(alpha)) )
250 template <
typename T>
253 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
264 viennacl::traits::opencl_handle(vec2),
278 template <
typename T,
typename OP>
282 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
283 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
301 viennacl::traits::opencl_handle(proxy.lhs()),
305 viennacl::traits::opencl_handle(proxy.rhs()),
320 template <
typename T,
typename OP>
324 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
325 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
346 viennacl::traits::opencl_handle(proxy.lhs()),
359 template <
typename T>
364 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
365 assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(partial_result).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
371 &&
bool(
"Incompatible vector sizes in inner_prod_impl()!"));
391 viennacl::traits::opencl_handle(vec2),
394 viennacl::traits::opencl_handle(partial_result)
408 template <
typename T>
413 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
414 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
420 temp.
resize(work_groups, ctx);
436 viennacl::traits::opencl_handle(result) )
442 template <
typename ScalarT>
460 template <
typename T>
465 assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
474 temp.
resize(8 * work_groups, ctx);
486 while (current_index < vec_tuple.
const_size())
488 switch (vec_tuple.
const_size() - current_index)
505 viennacl::traits::opencl_handle(temp)
512 viennacl::traits::opencl_handle(result),
531 viennacl::traits::opencl_handle(temp)
538 viennacl::traits::opencl_handle(result),
555 viennacl::traits::opencl_handle(temp)
562 viennacl::traits::opencl_handle(result),
577 viennacl::traits::opencl_handle(temp)
584 viennacl::traits::opencl_handle(result),
613 viennacl::traits::opencl_handle(temp)
620 viennacl::traits::opencl_handle(result),
642 template <
typename T>
647 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
653 temp.
resize(work_groups, ctx);
661 std::vector<T> temp_cpu(work_groups);
665 for (
typename std::vector<T>::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
678 template <
typename T>
683 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(partial_result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
698 viennacl::traits::opencl_handle(partial_result) )
710 template <
typename T>
714 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
744 template <
typename T>
755 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType;
757 CPUVectorType temp_cpu(work_groups);
761 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
762 result += static_cast<T>(*it);
775 template <
typename T>
779 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
809 template <
typename T>
820 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType;
822 CPUVectorType temp_cpu(work_groups);
826 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
827 result += static_cast<T>(*it);
828 result = std::sqrt(result);
840 template <
typename T>
844 assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
874 template <
typename T>
885 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType;
887 CPUVectorType temp_cpu(work_groups);
891 for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
892 result = std::max(result, static_cast<T>(*it));
906 template <
typename T>
929 cl_int err = clEnqueueReadBuffer(ctx.
get_queue().
handle().
get(), h.
get(), CL_TRUE, 0,
sizeof(cl_uint), &result, 0, NULL, NULL);
944 template <
typename T>
949 assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() &&
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
961 viennacl::traits::opencl_handle(vec2),
965 viennacl::traits::opencl_handle(alpha),
966 viennacl::traits::opencl_handle(beta))
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1196
std::size_t vcl_size_t
Definition: forwards.h:58
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:46
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:172
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: vector_operations.hpp:95
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:859
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
Definition: vector_operations.hpp:776
Represents an OpenCL device within ViennaCL.
Common implementations shared by OpenCL-based operations.
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Definition: vector_operations.hpp:745
Generic size and resize functionality for different vector and matrix types.
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
Definition: vector_operations.hpp:945
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
viennacl::ocl::packed_cl_uint make_layout(vector_base< ScalarT > const &vec)
Definition: vector_operations.hpp:443
void norm_reduction_impl(vector_base< T > const &vec, vector_base< T > &partial_result, cl_uint norm_id)
Computes the partial work group results for vector norms.
Definition: vector_operations.hpp:679
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
Main kernel class for generating OpenCL kernels for multiple inner products on/with viennacl::vector<...
Definition: vector.hpp:646
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector.
Definition: vector_operations.hpp:841
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:49
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
static void init(viennacl::ocl::context &ctx)
Definition: vector_element.hpp:99
viennacl::ocl::handle< cl_command_queue > const & handle() const
Definition: command_queue.hpp:81
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:181
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: vector_operations.hpp:157
#define VIENNACL_ERR_CHECK(err)
Definition: error.hpp:655
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, vector_base< T > &partial_result)
Computes the partial inner product of two vectors - implementation. Library users should call inner_p...
Definition: vector_operations.hpp:360
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
viennacl::ocl::command_queue & get_queue()
Definition: context.hpp:249
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:199
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:363
const OCL_TYPE & get() const
Definition: handle.hpp:189
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU.
Definition: vector_operations.hpp:810
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:211
OpenCL kernel file for vector operations.
void element_op(matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
Definition: matrix_operations.hpp:460
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: vector_operations.hpp:55
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:39
void resize(size_type new_size, bool preserve=true)
Resizes the allocated memory for the vector. Pads the memory to be a multiple of 'ALIGNMENT'.
Definition: vector.hpp:1096
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector.
Definition: vector_operations.hpp:711
Common base class for dense vectors, vector ranges, and vector slices.
Definition: forwards.h:205
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
Definition: predicate.hpp:448
Main kernel class for generating OpenCL kernels for elementwise operations other than addition and su...
Definition: vector_element.hpp:92
T type
Definition: result_of.hpp:590
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:825
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Representation of an OpenCL kernel in ViennaCL.
cl_uint index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Definition: vector_operations.hpp:907
cl_uint stride
Increment between integers.
Definition: kernel.hpp:51
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector.
Definition: vector_operations.hpp:875
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:831
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:86
OpenCL kernel file for element-wise vector operations.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Definition: vector_operations.hpp:225
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
Definition: predicate.hpp:418
static void init(viennacl::ocl::context &ctx)
Definition: vector.hpp:607
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
std::string op_to_string(op_abs)
Definition: common.hpp:71
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:53
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied.
Definition: vector_operations.hpp:251
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:90
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
Definition: vector_operations.hpp:643
Implementation of the ViennaCL scalar class.
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
STL-like transfer of a GPU vector to the CPU. The cpu type is assumed to reside in a linear piece of ...
Definition: vector.hpp:1284
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Definition: vector.hpp:863
static void init(viennacl::ocl::context &ctx)
Definition: vector.hpp:653
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:600
Simple enable-if variant that uses the SFINAE pattern.
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:55
vcl_size_t const_size() const
Definition: vector.hpp:1193