ViennaCL - The Vienna Computing Library  1.5.2
block_ilu.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
2 #define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include "viennacl/forwards.h"
28 #include "viennacl/tools/tools.hpp"
32 
33 #include <map>
34 
35 namespace viennacl
36 {
37  namespace linalg
38  {
39  namespace detail
40  {
42  template <typename VectorType, typename ValueType, typename SizeType = vcl_size_t>
44  {
45  public:
46  //typedef typename VectorType::value_type value_type;
47  //typedef typename VectorType::size_type size_type;
48 
49  ilu_vector_range(VectorType & v,
50  SizeType start_index,
51  SizeType vec_size
52  ) : vec_(v), start_(start_index), size_(vec_size) {}
53 
54  ValueType & operator()(SizeType index)
55  {
56  assert(index < size_ && bool("Index out of bounds!"));
57  return vec_[start_ + index];
58  }
59 
60  ValueType & operator[](SizeType index)
61  {
62  assert(index < size_ && bool("Index out of bounds!"));
63  return vec_[start_ + index];
64  }
65 
66  SizeType size() const { return size_; }
67 
68  private:
69  VectorType & vec_;
70  SizeType start_;
71  SizeType size_;
72  };
73 
81  template <typename ScalarType>
83  viennacl::compressed_matrix<ScalarType> & diagonal_block_A,
84  vcl_size_t start_index,
85  vcl_size_t stop_index
86  )
87  {
88 
89  assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
90  assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
91  assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
92 
93  ScalarType const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(A.handle());
94  unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
95  unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
96 
97  ScalarType * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(diagonal_block_A.handle());
98  unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1());
99  unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2());
100 
101  vcl_size_t output_counter = 0;
102  for (vcl_size_t row = start_index; row < stop_index; ++row)
103  {
104  unsigned int buffer_col_start = A_row_buffer[row];
105  unsigned int buffer_col_end = A_row_buffer[row+1];
106 
107  output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter);
108 
109  for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
110  {
111  unsigned int col = A_col_buffer[buf_index];
112  if (col < start_index)
113  continue;
114 
115  if (col >= static_cast<unsigned int>(stop_index))
116  continue;
117 
118  output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index);
119  output_elements[output_counter] = A_elements[buf_index];
120  ++output_counter;
121  }
122  output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter);
123  }
124  }
125 
126 
127  }
128 
134  template <typename MatrixType, typename ILUTag>
136  {
137  typedef typename MatrixType::value_type ScalarType;
138 
139  public:
140  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
141 
142 
143  block_ilu_precond(MatrixType const & mat,
144  ILUTag const & tag,
145  vcl_size_t num_blocks = 8
146  ) : tag_(tag), LU_blocks(num_blocks)
147  {
148 
149  // Set up vector of block indices:
150  block_indices_.resize(num_blocks);
151  for (vcl_size_t i=0; i<num_blocks; ++i)
152  {
153  vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
154  vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
155 
156  block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
157  }
158 
159  //initialize preconditioner:
160  //std::cout << "Start CPU precond" << std::endl;
161  init(mat);
162  //std::cout << "End CPU precond" << std::endl;
163  }
164 
165  block_ilu_precond(MatrixType const & mat,
166  ILUTag const & tag,
167  index_vector_type const & block_boundaries
168  ) : tag_(tag), block_indices_(block_boundaries), LU_blocks(block_boundaries.size())
169  {
170  //initialize preconditioner:
171  //std::cout << "Start CPU precond" << std::endl;
172  init(mat);
173  //std::cout << "End CPU precond" << std::endl;
174  }
175 
176 
177  template <typename VectorType>
178  void apply(VectorType & vec) const
179  {
180  for (vcl_size_t i=0; i<block_indices_.size(); ++i)
181  {
182  detail::ilu_vector_range<VectorType, ScalarType> vec_range(vec, block_indices_[i].first, LU_blocks[i].size2());
183 
184  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
185  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
186  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
187 
188  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
189  viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
190 
191  }
192  }
193 
194  private:
195  void init(MatrixType const & A)
196  {
198  viennacl::compressed_matrix<ScalarType> mat(host_context);
199 
200  viennacl::copy(A, mat);
201 
202  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
203 
204 #ifdef VIENNACL_WITH_OPENMP
205  #pragma omp parallel for
206 #endif
207  for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
208  {
209  // Step 1: Extract blocks
210  vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
211  vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
212  viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context);
213 
214  detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second);
215 
216  // Step 2: Precondition blocks:
217  viennacl::switch_memory_context(LU_blocks[i], host_context);
218  preconditioner_dispatch(mat_block, LU_blocks[i], tag_);
219  }
220 
221  }
222 
223  void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
226  {
227  LU = mat_block;
229  }
230 
231  void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
234  {
235  std::vector< std::map<unsigned int, ScalarType> > temp(mat_block.size1());
236 
237  viennacl::linalg::precondition(mat_block, temp, tag_);
238 
239  viennacl::copy(temp, LU);
240  }
241 
242  ILUTag const & tag_;
243  index_vector_type block_indices_;
244  std::vector< viennacl::compressed_matrix<ScalarType> > LU_blocks;
245  };
246 
247 
248 
249 
250 
255  template <typename ScalarType, unsigned int MAT_ALIGNMENT, typename ILUTag>
256  class block_ilu_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT>, ILUTag >
257  {
259  //typedef std::vector<ScalarType> STLVectorType;
260 
261  public:
262  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block
263 
264 
266  ILUTag const & tag,
267  vcl_size_t num_blocks = 8
268  ) : tag_(tag),
269  block_indices_(num_blocks),
270  gpu_block_indices(),
271  gpu_L_trans(0,0, viennacl::traits::context(mat)),
272  gpu_U_trans(0,0, viennacl::traits::context(mat)),
273  gpu_D(mat.size1(), viennacl::traits::context(mat)),
274  LU_blocks(num_blocks)
275  {
276  // Set up vector of block indices:
277  block_indices_.resize(num_blocks);
278  for (vcl_size_t i=0; i<num_blocks; ++i)
279  {
280  vcl_size_t start_index = ( i * mat.size1()) / num_blocks;
281  vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks;
282 
283  block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
284  }
285 
286  //initialize preconditioner:
287  //std::cout << "Start CPU precond" << std::endl;
288  init(mat);
289  //std::cout << "End CPU precond" << std::endl;
290  }
291 
293  ILUTag const & tag,
294  index_vector_type const & block_boundaries
295  ) : tag_(tag),
296  block_indices_(block_boundaries),
297  gpu_block_indices(),
298  gpu_L_trans(0,0,viennacl::traits::context(mat)),
299  gpu_U_trans(0,0,viennacl::traits::context(mat)),
300  gpu_D(mat.size1(),viennacl::traits::context(mat)),
301  LU_blocks(block_boundaries.size())
302  {
303  //initialize preconditioner:
304  //std::cout << "Start CPU precond" << std::endl;
305  init(mat);
306  //std::cout << "End CPU precond" << std::endl;
307  }
308 
309 
310  void apply(vector<ScalarType> & vec) const
311  {
312  viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans), gpu_block_indices, block_indices_.size(), gpu_D,
313  vec,
315 
316  viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans), gpu_block_indices, block_indices_.size(), gpu_D,
317  vec,
319 
320  //apply_cpu(vec);
321  }
322 
323 
324  private:
325 
326  void init(MatrixType const & A)
327  {
329  viennacl::compressed_matrix<ScalarType> mat(host_context);
330 
331  mat = A;
332 
333  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
334 
335 #ifdef VIENNACL_WITH_OPENMP
336  #pragma omp parallel for
337 #endif
338  for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
339  {
340  // Step 1: Extract blocks
341  vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
342  vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
343  viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context);
344 
345  detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second);
346 
347  // Step 2: Precondition blocks:
348  viennacl::switch_memory_context(LU_blocks[i], host_context);
349  preconditioner_dispatch(mat_block, LU_blocks[i], tag_);
350  }
351 
352  /*
353  * copy resulting preconditioner back to GPU:
354  */
355 
359 
360  viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices, 2 * block_indices_.size());
361  for (vcl_size_t i=0; i<block_indices_.size(); ++i)
362  {
363  block_indices_uint.set(2*i, block_indices_[i].first);
364  block_indices_uint.set(2*i + 1, block_indices_[i].second);
365  }
366 
367  viennacl::backend::memory_create(gpu_block_indices, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get());
368 
369  blocks_to_device(mat.size1());
370 
371  }
372 
373  // Copy computed preconditioned blocks to OpenCL device
374  void blocks_to_device(vcl_size_t matrix_size)
375  {
376  std::vector< std::map<unsigned int, ScalarType> > L_transposed(matrix_size);
377  std::vector< std::map<unsigned int, ScalarType> > U_transposed(matrix_size);
378  std::vector<ScalarType> entries_D(matrix_size);
379 
380  //
381  // Transpose individual blocks into a single large matrix:
382  //
383  for (vcl_size_t block_index = 0; block_index < LU_blocks.size(); ++block_index)
384  {
385  MatrixType const & current_block = LU_blocks[block_index];
386 
387  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle1());
388  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle2());
389  ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(current_block.handle());
390 
391  vcl_size_t block_start = block_indices_[block_index].first;
392 
393  //transpose L and U:
394  for (vcl_size_t row = 0; row < current_block.size1(); ++row)
395  {
396  unsigned int buffer_col_start = row_buffer[row];
397  unsigned int buffer_col_end = row_buffer[row+1];
398 
399  for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
400  {
401  unsigned int col = col_buffer[buf_index];
402 
403  if (row > col) //entry for L
404  L_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index];
405  else if (row == col)
406  entries_D[row + block_start] = elements[buf_index];
407  else //entry for U
408  U_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index];
409  }
410  }
411  }
412 
413  //
414  // Move data to GPU:
415  //
416  tools::const_sparse_matrix_adapter<ScalarType, unsigned int> adapted_L_transposed(L_transposed, matrix_size, matrix_size);
417  tools::const_sparse_matrix_adapter<ScalarType, unsigned int> adapted_U_transposed(U_transposed, matrix_size, matrix_size);
418  viennacl::copy(adapted_L_transposed, gpu_L_trans);
419  viennacl::copy(adapted_U_transposed, gpu_U_trans);
420  viennacl::copy(entries_D, gpu_D);
421  }
422 
423  void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
426  {
427  LU = mat_block;
429  }
430 
431  void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
434  {
435  std::vector< std::map<unsigned int, ScalarType> > temp(mat_block.size1());
436 
437  viennacl::linalg::precondition(mat_block, temp, tag_);
438 
439  viennacl::copy(temp, LU);
440  }
441 
442 
443  ILUTag const & tag_;
444  index_vector_type block_indices_;
445  viennacl::backend::mem_handle gpu_block_indices;
449 
450  std::vector< MatrixType > LU_blocks;
451  };
452 
453 
454  }
455 }
456 
457 
458 
459 
460 #endif
461 
462 
463 
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:95
std::size_t vcl_size_t
Definition: forwards.h:58
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Definition: sparse_matrix_operations.hpp:330
Various little tools used here and there in ViennaCL.
ilu_vector_range(VectorType &v, SizeType start_index, SizeType vec_size)
Definition: block_ilu.hpp:49
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
A tag for incomplete LU factorization with static pattern (ILU0)
Definition: ilu0.hpp:59
This file provides the forward declarations for the main types used within ViennaCL.
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
Definition: block_ilu.hpp:140
Implementations of incomplete factorization preconditioners with static nonzero pattern.
A block ILU preconditioner class, can be supplied to solve()-routines.
Definition: block_ilu.hpp:135
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_matrix.hpp:701
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
ValueType & operator[](SizeType index)
Definition: block_ilu.hpp:60
void set(vcl_size_t index, U value)
Definition: util.hpp:145
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:91
block_ilu_precond(MatrixType const &mat, ILUTag const &tag, vcl_size_t num_blocks=8)
Definition: block_ilu.hpp:265
void apply(vector< ScalarType > &vec) const
Definition: block_ilu.hpp:310
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
A tag class representing an upper triangular matrix.
Definition: forwards.h:708
A tag for incomplete LU factorization with threshold (ILUT)
Definition: ilut.hpp:45
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
Definition: block_ilu.hpp:262
block_ilu_precond(MatrixType const &mat, ILUTag const &tag, vcl_size_t num_blocks=8)
Definition: block_ilu.hpp:143
void apply(VectorType &vec) const
Definition: block_ilu.hpp:178
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
const vcl_size_t & size1() const
Returns the number of rows.
Definition: compressed_matrix.hpp:692
void precondition(viennacl::compressed_matrix< ScalarType > &A, ilu0_tag const &)
Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices...
Definition: ilu0.hpp:79
Common routines used within ILU-type preconditioners.
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
block_ilu_precond(MatrixType const &mat, ILUTag const &tag, index_vector_type const &block_boundaries)
Definition: block_ilu.hpp:292
block_ilu_precond(MatrixType const &mat, ILUTag const &tag, index_vector_type const &block_boundaries)
Definition: block_ilu.hpp:165
ValueType & operator()(SizeType index)
Definition: block_ilu.hpp:54
Implementations of an incomplete factorization preconditioner with threshold (ILUT) ...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Definition: forwards.h:479
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:713
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
void extract_block_matrix(viennacl::compressed_matrix< ScalarType > const &A, viennacl::compressed_matrix< ScalarType > &diagonal_block_A, vcl_size_t start_index, vcl_size_t stop_index)
Extracts a diagonal block from a larger system matrix.
Definition: block_ilu.hpp:82
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
Helper range class for representing a subvector of a larger buffer.
Definition: block_ilu.hpp:43
SizeType size() const
Definition: block_ilu.hpp:66
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_matrix.hpp:703
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_matrix.hpp:699
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixType >::value >::type block_inplace_solve(const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::backend::mem_handle const &block_index_array, vcl_size_t num_blocks, viennacl::vector_base< ScalarType > const &mat_diagonal, viennacl::vector_base< ScalarType > &vec, SOLVERTAG tag)
Definition: sparse_matrix_operations.hpp:286
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.
Definition: memory.hpp:624