1 #ifndef VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_
64 template <
typename NumericT,
typename F,
69 typedef NumericT value_type;
73 KernelClass::init(ctx);
85 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha)),
87 viennacl::traits::opencl_handle(mat2),
96 template <
typename NumericT,
typename F,
97 typename ScalarType1,
typename ScalarType2>
102 typedef NumericT value_type;
106 KernelClass::init(ctx);
108 std::string kernel_name;
110 kernel_name =
"ambm_cpu_cpu";
112 kernel_name =
"ambm_cpu_gpu";
114 kernel_name =
"ambm_gpu_cpu";
116 kernel_name =
"ambm_gpu_gpu";
128 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha)),
130 viennacl::traits::opencl_handle(mat2),
135 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(beta)),
137 viennacl::traits::opencl_handle(mat3),
146 template <
typename NumericT,
typename F,
147 typename ScalarType1,
typename ScalarType2>
152 typedef NumericT value_type;
156 KernelClass::init(ctx);
158 std::string kernel_name;
160 kernel_name =
"ambm_m_cpu_cpu";
162 kernel_name =
"ambm_m_cpu_gpu";
164 kernel_name =
"ambm_m_gpu_cpu";
166 kernel_name =
"ambm_m_gpu_gpu";
178 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha)),
180 viennacl::traits::opencl_handle(mat2),
185 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(beta)),
187 viennacl::traits::opencl_handle(mat3),
197 template <
typename NumericT,
typename F>
200 typedef NumericT value_type;
204 KernelClass::init(ctx);
206 value_type alpha =
static_cast<value_type
>(s);
217 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha))
222 template <
typename NumericT,
typename F>
225 typedef NumericT value_type;
229 KernelClass::init(ctx);
231 value_type alpha =
static_cast<value_type
>(s);
239 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<value_type>(alpha))
244 template <
typename NumericT,
typename F>
255 KernelClass::init(ctx);
257 cl_uint options_alpha = 0;
298 viennacl::traits::opencl_handle(NumericT(1)),
300 viennacl::traits::opencl_handle(vec),
305 template <
typename NumericT,
typename F>
311 KernelClass::init(ctx);
313 cl_uint options_alpha = 0;
355 viennacl::traits::opencl_handle(NumericT(1)),
357 viennacl::traits::opencl_handle(mat),
362 template <
typename NumericT,
typename F>
368 KernelClass::init(ctx);
370 cl_uint options_alpha = 0;
398 viennacl::traits::opencl_handle(NumericT(1)),
400 viennacl::traits::opencl_handle(mat),
405 template <
typename NumericT,
typename F>
411 KernelClass::init(ctx);
413 cl_uint options_alpha = 0;
441 viennacl::traits::opencl_handle(NumericT(1)),
443 viennacl::traits::opencl_handle(mat),
459 template <
typename T,
typename F,
typename OP>
463 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
464 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
468 KernelClass::init(ctx);
484 viennacl::traits::opencl_handle(proxy.lhs()),
489 viennacl::traits::opencl_handle(proxy.rhs()),
506 template <
typename T,
typename F,
typename OP>
510 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
511 assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
524 viennacl::traits::opencl_handle(proxy.lhs()),
546 template <
typename NumericT,
typename F>
551 typedef NumericT value_type;
555 KernelClass::init(ctx);
569 viennacl::traits::opencl_handle(vec),
574 viennacl::traits::opencl_handle(result),
594 template <
typename NumericT,
typename F>
605 KernelClass::init(ctx);
619 viennacl::traits::opencl_handle(vec),
624 viennacl::traits::opencl_handle(result),
641 template <
typename T1,
typename T2,
typename T3,
typename ScalarType >
647 std::string kernel_name)
657 KernelClass::init(ctx);
667 cpu_value_type cl_alpha =
static_cast<cpu_value_type
>(alpha);
668 cpu_value_type cl_beta =
static_cast<cpu_value_type
>(beta);
671 viennacl::traits::opencl_handle(A),
677 viennacl::traits::opencl_handle(B),
684 viennacl::traits::opencl_handle(C),
694 template <
typename T1,
typename T2,
typename T3,
typename ScalarType >
700 std::string kernel_name)
710 KernelClass::init(ctx);
720 cpu_value_type cl_alpha =
static_cast<cpu_value_type
>(alpha);
721 cpu_value_type cl_beta =
static_cast<cpu_value_type
>(beta);
724 viennacl::traits::opencl_handle(A),
730 viennacl::traits::opencl_handle(B),
737 viennacl::traits::opencl_handle(C),
746 template <
typename T1,
typename T2,
typename T3,
typename ScalarType >
752 std::string fast_kernel_name,
753 std::string slow_kernel_name)
784 template <
typename NumericT,
typename F1,
typename F2,
typename F3,
typename ScalarType >
806 detail::prod(A, B, C, alpha, beta,
"prod16_AA",
"prod_AA");
820 template <
typename NumericT,
typename F1,
typename F2,
typename F3,
typename ScalarType >
845 if(A_not_aligned || A.lhs().start1() > 0 || A.lhs().start2() > 0 || A.lhs().stride1() > 1 || A.lhs().stride2() > 1
848 detail::prod(A.lhs(), B, C, alpha, beta,
"prod16_TA",
"prod_TA");
864 template <
typename NumericT,
typename F1,
typename F2,
typename F3,
typename ScalarType >
885 ||B_not_aligned || B.lhs().start1() > 0 || B.lhs().start2() > 0 || B.lhs().stride1() > 1 || B.lhs().stride2() > 1
887 detail::prod(A, B.lhs(), C, alpha, beta,
"prod16_AT",
"prod_AT");
902 template <
typename NumericT,
typename F1,
typename F2,
typename F3,
typename ScalarType >
922 if(A_not_aligned || A.lhs().start1() > 0 || A.lhs().start2() > 0 || A.lhs().stride1() > 1 || A.lhs().stride2() > 1
923 ||B_not_aligned || B.lhs().start1() > 0 || B.lhs().start2() > 0 || B.lhs().stride1() > 1 || B.lhs().stride2() > 1
925 detail::prod(A.lhs(), B.lhs(), C, alpha, beta,
"prod16_TT",
"prod_TT");
954 template <
typename NumericT,
typename F,
typename S1>
956 S1
const & alpha,
vcl_size_t len_alpha,
bool reciprocal_alpha,
bool flip_sign_alpha,
965 KernelClass::init(ctx);
977 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
980 viennacl::traits::opencl_handle(vec1),
985 viennacl::traits::opencl_handle(vec2),
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
std::size_t vcl_size_t
Definition: forwards.h:58
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:46
void ambm_m(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:148
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:859
result_of::size_type< matrix_base< NumericT, F > >::type stride2(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:68
Represents an OpenCL device within ViennaCL.
Common implementations shared by OpenCL-based operations.
Generic size and resize functionality for different vector and matrix types.
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:399
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Runtime generation of OpenCL kernels for dense matrix-matrix products.
void matrix_column(const matrix_base< NumericT, F > &mat, unsigned int j, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:406
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:647
A tag class representing assignment.
Definition: forwards.h:63
A dense matrix class.
Definition: forwards.h:290
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
size_type stride2() const
Returns the number of columns.
Definition: matrix.hpp:637
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:49
void prod_slow_kernel(const T1 &A, const T2 &B, T3 &C, ScalarType alpha, ScalarType beta, std::string kernel_name)
Definition: matrix_operations.hpp:642
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
Determines row and column increments for matrices and matrix proxies.
Main kernel class for the generation of matrix-matrix product kernels C = A * B.
Definition: matrix_prod.hpp:431
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
the user interface for the code generator
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
result_of::size_type< matrix_base< NumericT, F > >::type stride1(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:57
Main kernel class for generating OpenCL kernels for elementwise-operations such as element_sin() on/w...
Definition: matrix_element.hpp:77
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT, F > &mat)
Definition: matrix_operations.hpp:245
void am(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: matrix_operations.hpp:66
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Definition: forwards.h:363
size_type stride1() const
Returns the number of rows.
Definition: matrix.hpp:635
void generate_enqueue_statement(viennacl::scheduler::statement const &s, scheduler::statement_node const &root_node)
Generate and enqueue a statement plus root_node into the current queue.
Definition: generate.hpp:395
size_type start1() const
Returns the number of rows.
Definition: matrix.hpp:630
size_type start2() const
Returns the number of columns.
Definition: matrix.hpp:632
void element_op(matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C)
Definition: matrix_operations.hpp:460
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Definition: common.hpp:39
void matrix_assign(matrix_base< NumericT, F > &mat, NumericT s, bool clear=false)
Definition: matrix_operations.hpp:198
void matrix_row(const matrix_base< NumericT, F > &mat, unsigned int i, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:363
Common base class for dense vectors, vector ranges, and vector slices.
Definition: forwards.h:205
void prod(const T1 &A, const T2 &B, T3 &C, ScalarType alpha, ScalarType beta, std::string fast_kernel_name, std::string slow_kernel_name)
Definition: matrix_operations.hpp:747
void matrix_diagonal_assign(matrix_base< NumericT, F > &mat, NumericT s)
Definition: matrix_operations.hpp:223
A tag class representing matrix-matrix products.
Definition: forwards.h:78
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
Definition: predicate.hpp:448
vcl_size_t internal_size2(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
Proxy classes for vectors.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
static void init(viennacl::ocl::context &ctx)
Definition: matrix_element.hpp:84
T::ERROR_ARGUMENT_PROVIDED_IS_NOT_A_MATRIX type
Definition: result_of.hpp:137
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
Definition: matrix.hpp:877
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Representation of an OpenCL kernel in ViennaCL.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void scaled_rank_1_update(matrix_base< NumericT, F > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
Definition: matrix_operations.hpp:955
cl_uint stride
Increment between integers.
Definition: kernel.hpp:51
A tag class representing transposed matrices.
Definition: forwards.h:165
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:547
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:86
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:447
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
vcl_size_t internal_size1(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
Definition: predicate.hpp:418
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
std::string op_to_string(op_abs)
Definition: common.hpp:71
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:53
OpenCL kernel file for element-wise matrix operations.
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:90
Implementation of the ViennaCL scalar class.
void prod_fast_kernel(const T1 &A, const T2 &B, T3 &C, ScalarType alpha, ScalarType beta, std::string kernel_name)
Definition: matrix_operations.hpp:695
A collection of compile time type deductions.
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649
void ambm(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:98
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:600
Simple enable-if variant that uses the SFINAE pattern.
Runtime generation of OpenCL kernels for matrix operations.
void matrix_diag_to_vector(const matrix_base< NumericT, F > &mat, int k, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:306
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:55