ViennaCL - The Vienna Computing Library  1.5.2
sparse_matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/ocl/device.hpp"
27 #include "viennacl/ocl/handle.hpp"
28 #include "viennacl/ocl/kernel.hpp"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/vector.hpp"
31 #include "viennacl/tools/tools.hpp"
38 
39 namespace viennacl
40 {
41  namespace linalg
42  {
43  namespace opencl
44  {
45 
46  //
47  // Compressed matrix
48  //
49 
50  namespace detail
51  {
52  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
56  {
57  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
60 
61  viennacl::ocl::enqueue(row_info_kernel(mat.handle1().opencl_handle(), mat.handle2().opencl_handle(), mat.handle().opencl_handle(),
62  viennacl::traits::opencl_handle(vec),
63  cl_uint(mat.size1()),
64  cl_uint(info_selector)
65  )
66  );
67  }
68  }
69 
78  template<class TYPE, unsigned int ALIGNMENT>
80  const viennacl::vector_base<TYPE> & vec,
82  {
83  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
85  std::stringstream ss;
86  ss << "vec_mul";
87  if (ALIGNMENT == 4)
88  ss << "4";
89  if (ALIGNMENT == 8)
90  ss << "8";
91 
93 
95  layout_vec.start = cl_uint(viennacl::traits::start(vec));
96  layout_vec.stride = cl_uint(viennacl::traits::stride(vec));
97  layout_vec.size = cl_uint(viennacl::traits::size(vec));
98  layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
99 
100  viennacl::ocl::packed_cl_uint layout_result;
101  layout_result.start = cl_uint(viennacl::traits::start(result));
102  layout_result.stride = cl_uint(viennacl::traits::stride(result));
103  layout_result.size = cl_uint(viennacl::traits::size(result));
104  layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result));
105 
106  viennacl::ocl::enqueue(k(mat.handle1().opencl_handle(), mat.handle2().opencl_handle(), mat.handle().opencl_handle(),
107  vec, layout_vec,
108  result, layout_result
109  ));
110  }
111 
112 
121  template< typename TYPE, unsigned int ALIGNMENT, typename F1, typename F2>
123  const viennacl::matrix_base<TYPE, F1> & d_mat,
125 
126  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context());
130 
131  viennacl::ocl::enqueue(k(sp_mat.handle1().opencl_handle(), sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(),
132  viennacl::traits::opencl_handle(d_mat),
133  cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)),
134  cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)),
135  cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)),
137  viennacl::traits::opencl_handle(result),
138  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
139  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
140  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
141  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) ));
142  }
143 
153  template< typename TYPE, unsigned int ALIGNMENT, typename F1, typename F2>
157  viennacl::op_trans > & d_mat,
159 
160  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context());
164 
165  viennacl::ocl::enqueue(k(sp_mat.handle1().opencl_handle(), sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(),
166  viennacl::traits::opencl_handle(d_mat.lhs()),
167  cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())),
168  cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())),
169  cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())),
170  cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())),
171  viennacl::traits::opencl_handle(result),
172  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
173  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
174  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
175  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) ) );
176  }
177 
178 
179 
180  // triangular solvers
181 
187  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
191  {
192  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
195 
196  k.local_work_size(0, 128);
198  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
199  viennacl::traits::opencl_handle(vec),
200  cl_uint(L.size1())
201  )
202  );
203  }
204 
210  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
214  {
215  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
217 
219 
220  k.local_work_size(0, 128);
222  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
223  viennacl::traits::opencl_handle(vec),
224  cl_uint(L.size1())
225  )
226  );
227  }
228 
229 
235  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
239  {
240  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context());
243 
244  k.local_work_size(0, 128);
246  viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(),
247  viennacl::traits::opencl_handle(vec),
248  cl_uint(U.size1())
249  )
250  );
251  }
252 
258  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
262  {
263  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context());
265 
267 
268  k.local_work_size(0, 128);
270  viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(),
271  viennacl::traits::opencl_handle(vec),
272  cl_uint(U.size1())
273  )
274  );
275  }
276 
277 
278 
279 
280 
281  // transposed triangular solvers
282 
283  namespace detail
284  {
285  //
286  // block solves
287  //
288  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
291  op_trans> & L,
292  viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks,
293  vector_base<ScalarType> const & /* L_diagonal */, //ignored
296  {
297  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L.lhs()).context());
300  block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0));
301 
302  viennacl::ocl::enqueue(block_solve_kernel(L.lhs().handle1().opencl_handle(),
303  L.lhs().handle2().opencl_handle(),
304  L.lhs().handle().opencl_handle(),
305  block_indices.opencl_handle(),
306  vec,
307  static_cast<cl_uint>(vec.size())));
308  }
309 
310 
311  template<typename ScalarType, unsigned int MAT_ALIGNMENT>
314  op_trans> & U,
315  viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks,
316  vector_base<ScalarType> const & U_diagonal,
319  {
320  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U.lhs()).context());
323  block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0));
324 
325  viennacl::ocl::enqueue(block_solve_kernel(U.lhs().handle1().opencl_handle(),
326  U.lhs().handle2().opencl_handle(),
327  U.lhs().handle().opencl_handle(),
328  U_diagonal,
329  block_indices.opencl_handle(),
330  vec,
331  static_cast<cl_uint>(vec.size())));
332  }
333 
334 
335  }
336 
337 
343  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
346  op_trans> const & proxy_L,
349  {
350  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context());
353 
354  k.local_work_size(0, 128);
356  viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
357  viennacl::traits::opencl_handle(vec),
358  cl_uint(proxy_L.lhs().size1())
359  )
360  );
361  }
362 
363 
369  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
372  op_trans> const & proxy_L,
375  {
376  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context());
378 
379  viennacl::vector<SCALARTYPE> diagonal(vec.size());
381 
383 
384  k.local_work_size(0, 128);
385  k.global_work_size(0, k.local_work_size());
386  viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
387  viennacl::traits::opencl_handle(diagonal),
388  viennacl::traits::opencl_handle(vec),
389  cl_uint(proxy_L.lhs().size1())
390  )
391  );
392  }
393 
399  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
402  op_trans> const & proxy_U,
405  {
406  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context());
409 
410  k.local_work_size(0, 128);
412  viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
413  viennacl::traits::opencl_handle(vec),
414  cl_uint(proxy_U.lhs().size1())
415  )
416  );
417  }
418 
419 
425  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
428  op_trans> const & proxy_U,
431  {
432  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context());
434 
435  viennacl::vector<SCALARTYPE> diagonal(vec.size());
437 
439 
440  k.local_work_size(0, 128);
441  k.global_work_size(0, k.local_work_size());
442  viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
443  viennacl::traits::opencl_handle(diagonal),
444  viennacl::traits::opencl_handle(vec),
445  cl_uint(proxy_U.lhs().size1())
446  )
447  );
448  }
449 
450 
451  //
452  // Compressed Compressed matrix
453  //
454 
463  template<class TYPE>
465  const viennacl::vector_base<TYPE> & vec,
467  {
468  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
471 
472  result.clear();
473 
475  layout_vec.start = cl_uint(viennacl::traits::start(vec));
476  layout_vec.stride = cl_uint(viennacl::traits::stride(vec));
477  layout_vec.size = cl_uint(viennacl::traits::size(vec));
478  layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
479 
480  viennacl::ocl::packed_cl_uint layout_result;
481  layout_result.start = cl_uint(viennacl::traits::start(result));
482  layout_result.stride = cl_uint(viennacl::traits::stride(result));
483  layout_result.size = cl_uint(viennacl::traits::size(result));
484  layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result));
485 
486  viennacl::ocl::enqueue(k(mat.handle1().opencl_handle(), mat.handle3().opencl_handle(), mat.handle2().opencl_handle(), mat.handle().opencl_handle(), cl_uint(mat.nnz1()),
487  vec, layout_vec,
488  result, layout_result
489  ));
490  }
491 
492 
493  //
494  // Coordinate matrix
495  //
496 
497  namespace detail
498  {
499  template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT>
503  {
504  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
507  unsigned int thread_num = 256; //k.local_work_size(0);
508 
509  row_info_kernel.local_work_size(0, thread_num);
510 
511  row_info_kernel.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
512  viennacl::ocl::enqueue(row_info_kernel(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(),
513  viennacl::traits::opencl_handle(vec),
514  cl_uint(info_selector),
515  viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
516  viennacl::ocl::local_mem(sizeof(SCALARTYPE)*thread_num)) );
517  }
518  }
519 
528  template<class SCALARTYPE, unsigned int ALIGNMENT>
532  {
533  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
535 
536  result.clear();
537 
539  layout_vec.start = cl_uint(viennacl::traits::start(vec));
540  layout_vec.stride = cl_uint(viennacl::traits::stride(vec));
541  layout_vec.size = cl_uint(viennacl::traits::size(vec));
542  layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
543 
544  viennacl::ocl::packed_cl_uint layout_result;
545  layout_result.start = cl_uint(viennacl::traits::start(result));
546  layout_result.stride = cl_uint(viennacl::traits::stride(result));
547  layout_result.size = cl_uint(viennacl::traits::size(result));
548  layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result));
549 
550  //std::cout << "prod(coordinate_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl;
551 
553  unsigned int thread_num = 256; //k.local_work_size(0);
554 
555  k.local_work_size(0, thread_num);
556 
557  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
558  //k.global_work_size(0, thread_num); //Only one work group
559  viennacl::ocl::enqueue(k(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(),
560  viennacl::traits::opencl_handle(vec),
561  layout_vec,
562  viennacl::traits::opencl_handle(result),
563  layout_result,
564  viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
565  viennacl::ocl::local_mem(sizeof(SCALARTYPE)*thread_num)) );
566 
567  }
568 
569 
578  template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
582  {
583  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
585 
588 
589  result.clear();
590 
591  unsigned int thread_num = 256; //k.local_work_size(0);
592  k.local_work_size(0, thread_num);
593  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
594 
595  viennacl::ocl::enqueue(k(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(),
596  viennacl::traits::opencl_handle(d_mat),
597  cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)),
598  cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)),
599  cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)),
601  viennacl::traits::opencl_handle(result),
602  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
603  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
604  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
605  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)),
606  viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)),
607  viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) );
608 
609  }
610 
619  template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
623  viennacl::op_trans > & d_mat,
625  {
626  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
628 
631 
632  result.clear();
633 
634  unsigned int thread_num = 256; //k.local_work_size(0);
635  k.local_work_size(0, thread_num);
636  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
637 
638  viennacl::ocl::enqueue(k(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(),
639  viennacl::traits::opencl_handle(d_mat),
640  cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())),
641  cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())),
642  cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())),
643  cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())),
644  viennacl::traits::opencl_handle(result),
645  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
646  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
647  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
648  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)),
649  viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)),
650  viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) );
651 
652  }
653 
654 
655  //
656  // ELL Matrix
657  //
658 
659  template<class TYPE, unsigned int ALIGNMENT>
661  const viennacl::vector_base<TYPE> & vec,
663  {
664  assert(mat.size1() == result.size());
665  assert(mat.size2() == vec.size());
666 
667  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
669  result.clear();
670 
672  layout_vec.start = cl_uint(viennacl::traits::start(vec));
673  layout_vec.stride = cl_uint(viennacl::traits::stride(vec));
674  layout_vec.size = cl_uint(viennacl::traits::size(vec));
675  layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
676 
677  viennacl::ocl::packed_cl_uint layout_result;
678  layout_result.start = cl_uint(viennacl::traits::start(result));
679  layout_result.stride = cl_uint(viennacl::traits::stride(result));
680  layout_result.size = cl_uint(viennacl::traits::size(result));
681  layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result));
682 
683  std::stringstream ss;
684  ss << "vec_mul_" << 1;//(ALIGNMENT != 1?4:1);
686 
687  unsigned int thread_num = 128;
688  unsigned int group_num = 256;
689 
690  k.local_work_size(0, thread_num);
691  k.global_work_size(0, thread_num * group_num);
692 
693  viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(),
694  mat.handle().opencl_handle(),
695  viennacl::traits::opencl_handle(vec),
696  layout_vec,
697  viennacl::traits::opencl_handle(result),
698  layout_result,
699  cl_uint(mat.size1()),
700  cl_uint(mat.size2()),
701  cl_uint(mat.internal_size1()),
702  cl_uint(mat.maxnnz()),
703  cl_uint(mat.internal_maxnnz())
704  )
705  );
706 
707 
708  }
709 
719  template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2 >
723 
724  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context());
728 
729  //unsigned int thread_num = 128;
730  //unsigned int group_num = 256;
731  //
732  //k.local_work_size(0, thread_num);
733  //k.global_work_size(0, thread_num * group_num);
734 
735  viennacl::ocl::enqueue(k(sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(),
736  cl_uint(sp_mat.size1()),
737  cl_uint(sp_mat.size2()),
738  cl_uint(sp_mat.internal_size1()),
739  cl_uint(sp_mat.maxnnz()),
740  cl_uint(sp_mat.internal_maxnnz()),
741  viennacl::traits::opencl_handle(d_mat),
742  cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)),
743  cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)),
744  cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)),
746  viennacl::traits::opencl_handle(result),
747  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
748  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
749  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
750  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result))
751  )
752  );
753  }
754 
764  template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2>
768  viennacl::op_trans > & d_mat,
770 
771  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context());
775 
776  //unsigned int thread_num = 128;
777  //unsigned int group_num = 256;
778  //
779  //k.local_work_size(0, thread_num);
780  //k.global_work_size(0, thread_num * group_num);
781 
782  viennacl::ocl::enqueue(k(sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(),
783  cl_uint(sp_mat.size1()),
784  cl_uint(sp_mat.size2()),
785  cl_uint(sp_mat.internal_size1()),
786  cl_uint(sp_mat.maxnnz()),
787  cl_uint(sp_mat.internal_maxnnz()),
788  viennacl::traits::opencl_handle(d_mat.lhs()),
789  cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())),
790  cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())),
791  cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())),
792  cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())),
793  viennacl::traits::opencl_handle(result),
794  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
795  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
796  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
797  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result))
798  )
799  );
800  }
801 
802  //
803  // Hybrid Matrix
804  //
805 
806  template<class TYPE, unsigned int ALIGNMENT>
808  const viennacl::vector_base<TYPE>& vec,
810  {
811  assert(mat.size1() == result.size());
812  assert(mat.size2() == vec.size());
813 
814  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
816 
818  layout_vec.start = cl_uint(viennacl::traits::start(vec));
819  layout_vec.stride = cl_uint(viennacl::traits::stride(vec));
820  layout_vec.size = cl_uint(viennacl::traits::size(vec));
821  layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec));
822 
823  viennacl::ocl::packed_cl_uint layout_result;
824  layout_result.start = cl_uint(viennacl::traits::start(result));
825  layout_result.stride = cl_uint(viennacl::traits::stride(result));
826  layout_result.size = cl_uint(viennacl::traits::size(result));
827  layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result));
828 
830 
831  unsigned int thread_num = 256;
832  unsigned int group_num = 32;
833 
834  k.local_work_size(0, thread_num);
835  k.global_work_size(0, thread_num * group_num);
836 
837  viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(),
838  mat.handle().opencl_handle(),
839  mat.handle3().opencl_handle(),
840  mat.handle4().opencl_handle(),
841  mat.handle5().opencl_handle(),
842  viennacl::traits::opencl_handle(vec),
843  layout_vec,
844  viennacl::traits::opencl_handle(result),
845  layout_result,
846  cl_uint(mat.size1()),
847  cl_uint(mat.internal_size1()),
848  cl_uint(mat.ell_nnz()),
849  cl_uint(mat.internal_ellnnz())
850  )
851  );
852  }
853 
854  template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
858  {
859  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
863 
864  unsigned int thread_num = 256;
865  unsigned int group_num = 32;
866 
867  k.local_work_size(0, thread_num);
868  k.global_work_size(0, thread_num * group_num);
869 
870  viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(),
871  mat.handle().opencl_handle(),
872  mat.handle3().opencl_handle(),
873  mat.handle4().opencl_handle(),
874  mat.handle5().opencl_handle(),
875  cl_uint(mat.size1()),
876  cl_uint(mat.internal_size1()),
877  cl_uint(mat.ell_nnz()),
878  cl_uint(mat.internal_ellnnz()),
879  viennacl::traits::opencl_handle(d_mat),
880  cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)),
881  cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)),
882  cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)),
884  viennacl::traits::opencl_handle(result),
885  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
886  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
887  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
888  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result))
889  )
890  );
891  }
892 
893  template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2>
897  viennacl::op_trans > & d_mat,
899  {
900  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context());
904 
905  unsigned int thread_num = 256;
906  unsigned int group_num = 32;
907 
908  k.local_work_size(0, thread_num);
909  k.global_work_size(0, thread_num * group_num);
910 
911  viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(),
912  mat.handle().opencl_handle(),
913  mat.handle3().opencl_handle(),
914  mat.handle4().opencl_handle(),
915  mat.handle5().opencl_handle(),
916  cl_uint(mat.size1()),
917  cl_uint(mat.internal_size1()),
918  cl_uint(mat.ell_nnz()),
919  cl_uint(mat.internal_ellnnz()),
920  viennacl::traits::opencl_handle(d_mat.lhs()),
921  cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())),
922  cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())),
923  cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())),
924  cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())),
925  viennacl::traits::opencl_handle(result),
926  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
927  cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)),
928  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
929  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result))
930  )
931  );
932  }
933 
934 
935  } // namespace opencl
936  } //namespace linalg
937 } //namespace viennacl
938 
939 
940 #endif
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:321
vcl_size_t size1() const
Definition: hyb_matrix.hpp:74
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_compressed_matrix.hpp:452
std::size_t vcl_size_t
Definition: forwards.h:58
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:46
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_compressed_matrix.hpp:456
const handle_type & handle3() const
Definition: hyb_matrix.hpp:83
void clear()
Resets all entries to zero.
Definition: matrix.hpp:640
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:837
result_of::size_type< matrix_base< NumericT, F > >::type stride2(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:68
const handle_type & handle4() const
Definition: hyb_matrix.hpp:84
Represents an OpenCL device within ViennaCL.
Common implementations shared by OpenCL-based operations.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_compressed_matrix.hpp:450
static void init(viennacl::ocl::context &ctx)
Definition: hyb_matrix.hpp:183
Helper class for checking whether a matrix has a row-major layout.
Definition: forwards.h:399
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
Definition: common.hpp:46
static void init(viennacl::ocl::context &ctx)
Definition: compressed_compressed_matrix.hpp:58
A tag class representing a lower triangular matrix.
Definition: forwards.h:703
vcl_size_t size1() const
Definition: ell_matrix.hpp:80
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
vcl_size_t size2() const
Definition: ell_matrix.hpp:81
Main kernel class for generating OpenCL kernels for coordinate_matrix.
Definition: coordinate_matrix.hpp:343
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:77
A dense matrix class.
Definition: forwards.h:290
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:49
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
vcl_size_t internal_maxnnz() const
Definition: ell_matrix.hpp:83
static void init(viennacl::ocl::context &ctx)
Definition: coordinate_matrix.hpp:350
const handle_type & handle2() const
Definition: hyb_matrix.hpp:82
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_matrix.hpp:701
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:71
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
Main kernel class for generating OpenCL kernels for ell_matrix.
Definition: ell_matrix.hpp:156
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: coordinate_matrix.hpp:354
const handle_type & handle() const
Definition: hyb_matrix.hpp:81
result_of::size_type< matrix_base< NumericT, F > >::type stride1(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:57
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
Definition: compressed_compressed_matrix.hpp:445
void clear()
Resets all entries to zero. Does not change the size of the vector.
Definition: vector.hpp:863
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
Definition: coordinate_matrix.hpp:356
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Main kernel class for generating OpenCL kernels for compressed_matrix.
Definition: compressed_matrix.hpp:1039
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
vcl_size_t ell_nnz() const
Definition: hyb_matrix.hpp:78
OpenCL kernel file for compressed_matrix operations.
A tag class representing an upper triangular matrix.
Definition: forwards.h:708
OpenCL kernel file for ell_matrix operations.
handle_type & handle()
Definition: ell_matrix.hpp:89
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
Definition: compressed_compressed_matrix.hpp:263
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Definition: compressed_compressed_matrix.hpp:454
static void init(viennacl::ocl::context &ctx)
Definition: ell_matrix.hpp:163
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
Definition: coordinate_matrix.hpp:352
vcl_size_t size2() const
Definition: hyb_matrix.hpp:75
static void init(viennacl::ocl::context &ctx)
Definition: compressed_matrix.hpp:1046
const vcl_size_t & size1() const
Returns the number of rows.
Definition: compressed_matrix.hpp:692
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
vcl_size_t internal_size2(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
void block_inplace_solve(const matrix_expression< const compressed_matrix< ScalarType, MAT_ALIGNMENT >, const compressed_matrix< ScalarType, MAT_ALIGNMENT >, op_trans > &L, viennacl::backend::mem_handle const &block_indices, vcl_size_t num_blocks, vector_base< ScalarType > const &, vector_base< ScalarType > &vec, viennacl::linalg::unit_lower_tag)
Definition: sparse_matrix_operations.hpp:289
OpenCL kernel file for hyb_matrix operations.
void inplace_solve(const matrix_base< NumericT, F1 > &A, matrix_base< NumericT, F2 > &B, SOLVERTAG)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
Definition: direct_solve.hpp:75
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
row_info_types
Definition: forwards.h:691
vcl_size_t maxnnz() const
Definition: ell_matrix.hpp:84
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
OpenCL kernel file for vector operations.
handle_type & handle2()
Definition: ell_matrix.hpp:92
cl_uint stride
Increment between integers.
Definition: kernel.hpp:51
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:713
OpenCL kernel file for coordinate_matrix operations.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
A tag class representing transposed matrices.
Definition: forwards.h:165
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:547
A sparse square matrix in compressed sparse rows format.
Definition: compressed_matrix.hpp:428
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
vcl_size_t internal_size1(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:53
const handle_type & handle5() const
Definition: hyb_matrix.hpp:85
void row_info(compressed_matrix< SCALARTYPE, MAT_ALIGNMENT > const &mat, vector_base< SCALARTYPE > &vec, viennacl::linalg::detail::row_info_types info_selector)
Definition: sparse_matrix_operations.hpp:53
Implementation of the ViennaCL scalar class.
vcl_size_t internal_size1() const
Definition: ell_matrix.hpp:77
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:718
Main kernel class for generating OpenCL kernels for compressed_compressed_matrix. ...
Definition: compressed_compressed_matrix.hpp:51
Main kernel class for generating OpenCL kernels for hyb_matrix.
Definition: hyb_matrix.hpp:176
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
Definition: coordinate_matrix.hpp:186
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_matrix.hpp:703
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_matrix.hpp:699
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:55