ViennaCL - The Vienna Computing Library  1.5.2
direct_solve.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP
2 #define VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/vector.hpp"
26 #include "viennacl/matrix.hpp"
27 #include "viennacl/ocl/kernel.hpp"
28 #include "viennacl/ocl/device.hpp"
29 #include "viennacl/ocl/handle.hpp"
31 
32 namespace viennacl
33 {
34  namespace linalg
35  {
36  namespace opencl
37  {
38  namespace detail
39  {
41  inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_upper_tag) { return (1 << 0); }
42  inline cl_uint get_option_for_solver_tag(viennacl::linalg::lower_tag) { return (1 << 2); }
43  inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_lower_tag) { return (1 << 2) | (1 << 0); }
44 
45  template <typename M1, typename M2, typename KernelType>
46  void inplace_solve_impl(M1 const & A, M2 & B, KernelType & k)
47  {
48  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A),
49  cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)),
51  cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)),
53  viennacl::traits::opencl_handle(B),
54  cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)),
56  cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)),
58  )
59  );
60  }
61  }
62 
63 
64  //
65  // Note: By convention, all size checks are performed in the calling frontend. No need to double-check here.
66  //
67 
69 
74  template <typename NumericT, typename F1, typename F2, typename SOLVERTAG>
76  {
77  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
78 
80  KernelClass::init(ctx);
81 
82  std::stringstream ss;
83  ss << SOLVERTAG::name() << "_solve";
84  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), ss.str());
85 
86  k.global_work_size(0, B.size2() * k.local_work_size());
88  }
89 
95  template <typename NumericT, typename F1, typename F2, typename SOLVERTAG>
98  SOLVERTAG)
99  {
100  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
101 
103  KernelClass::init(ctx);
104 
105  std::stringstream ss;
106  ss << SOLVERTAG::name() << "_trans_solve";
107  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), ss.str());
108 
109  k.global_work_size(0, proxy_B.lhs().size1() * k.local_work_size());
110  detail::inplace_solve_impl(A, proxy_B.lhs(), k);
111  }
112 
113  //upper triangular solver for transposed lower triangular matrices
119  template <typename NumericT, typename F1, typename F2, typename SOLVERTAG>
122  SOLVERTAG)
123  {
124  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(B).context());
125 
127  KernelClass::init(ctx);
128 
129  std::stringstream ss;
130  ss << "trans_" << SOLVERTAG::name() << "_solve";
131  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), ss.str());
132 
133  k.global_work_size(0, B.size2() * k.local_work_size());
134  detail::inplace_solve_impl(proxy_A.lhs(), B, k);
135  }
136 
142  template <typename NumericT, typename F1, typename F2, typename SOLVERTAG>
145  SOLVERTAG)
146  {
147  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_A.lhs()).context());
148 
150  KernelClass::init(ctx);
151 
152  std::stringstream ss;
153  ss << "trans_" << SOLVERTAG::name() << "_trans_solve";
154  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), ss.str());
155 
156  k.global_work_size(0, proxy_B.lhs().size1() * k.local_work_size());
157  detail::inplace_solve_impl(proxy_A.lhs(), proxy_B.lhs(), k);
158  }
159 
160 
161 
162  //
163  // Solve on vector
164  //
165 
166  template <typename NumericT, typename F, typename SOLVERTAG>
168  vector_base<NumericT> & vec,
169  SOLVERTAG)
170  {
171  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
172 
174  KernelClass::init(ctx);
175 
176  cl_uint options = detail::get_option_for_solver_tag(SOLVERTAG());
177  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), "triangular_substitute_inplace");
178 
180  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(mat),
181  cl_uint(viennacl::traits::start1(mat)), cl_uint(viennacl::traits::start2(mat)),
182  cl_uint(viennacl::traits::stride1(mat)), cl_uint(viennacl::traits::stride2(mat)),
183  cl_uint(viennacl::traits::size1(mat)), cl_uint(viennacl::traits::size2(mat)),
185  viennacl::traits::opencl_handle(vec),
186  cl_uint(viennacl::traits::start(vec)),
187  cl_uint(viennacl::traits::stride(vec)),
188  cl_uint(viennacl::traits::size(vec)),
189  options
190  )
191  );
192  }
193 
199  template <typename NumericT, typename F, typename SOLVERTAG>
201  vector_base<NumericT> & vec,
202  SOLVERTAG)
203  {
204  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec).context());
205 
207  KernelClass::init(ctx);
208 
209  cl_uint options = detail::get_option_for_solver_tag(SOLVERTAG()) | 0x02; //add transpose-flag
210  viennacl::ocl::kernel & k = ctx.get_kernel(KernelClass::program_name(), "triangular_substitute_inplace");
211 
213  viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(proxy.lhs()),
214  cl_uint(viennacl::traits::start1(proxy.lhs())), cl_uint(viennacl::traits::start2(proxy.lhs())),
215  cl_uint(viennacl::traits::stride1(proxy.lhs())), cl_uint(viennacl::traits::stride2(proxy.lhs())),
216  cl_uint(viennacl::traits::size1(proxy.lhs())), cl_uint(viennacl::traits::size2(proxy.lhs())),
217  cl_uint(viennacl::traits::internal_size1(proxy.lhs())), cl_uint(viennacl::traits::internal_size2(proxy.lhs())),
218  viennacl::traits::opencl_handle(vec),
219  cl_uint(viennacl::traits::start(vec)),
220  cl_uint(viennacl::traits::stride(vec)),
221  cl_uint(viennacl::traits::size(vec)),
222  options
223  )
224  );
225  }
226 
227 
228  }
229  }
230 }
231 
232 #endif
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
result_of::size_type< matrix_base< NumericT, F > >::type stride2(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:68
Represents an OpenCL device within ViennaCL.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag class representing a lower triangular matrix.
Definition: forwards.h:703
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
A dense matrix class.
Definition: forwards.h:290
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
cl_uint get_option_for_solver_tag(viennacl::linalg::upper_tag)
Definition: direct_solve.hpp:40
OpenCL kernel file for dense matrix solves with multiple right hand side (BLAS level 3) ...
void inplace_solve_impl(M1 const &A, M2 &B, KernelType &k)
Definition: direct_solve.hpp:46
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
result_of::size_type< matrix_base< NumericT, F > >::type stride1(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:57
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A tag class representing an upper triangular matrix.
Definition: forwards.h:708
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
Common base class for dense vectors, vector ranges, and vector slices.
Definition: forwards.h:205
Main kernel class for the generation of matrix solve kernels.
Definition: matrix_solve.hpp:134
vcl_size_t internal_size2(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
void inplace_solve(const matrix_base< NumericT, F1 > &A, matrix_base< NumericT, F2 > &B, SOLVERTAG)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
Definition: direct_solve.hpp:75
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
Definition: matrix.hpp:877
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:713
A tag class representing transposed matrices.
Definition: forwards.h:165
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
vcl_size_t internal_size1(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:718