ViennaCL - The Vienna Computing Library  1.5.2
fspai.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 #include <utility>
22 #include <iostream>
23 #include <fstream>
24 #include <string>
25 #include <algorithm>
26 #include <vector>
27 #include <math.h>
28 #include <map>
29 
30 //boost includes
31 #include "boost/numeric/ublas/vector.hpp"
32 #include "boost/numeric/ublas/matrix.hpp"
33 #include "boost/numeric/ublas/matrix_proxy.hpp"
34 #include "boost/numeric/ublas/vector_proxy.hpp"
35 #include "boost/numeric/ublas/storage.hpp"
36 #include "boost/numeric/ublas/io.hpp"
37 #include "boost/numeric/ublas/lu.hpp"
38 #include "boost/numeric/ublas/triangular.hpp"
39 #include "boost/numeric/ublas/matrix_expression.hpp"
40 
41 // ViennaCL includes
42 #include "viennacl/linalg/prod.hpp"
43 #include "viennacl/matrix.hpp"
47 #include "viennacl/scalar.hpp"
48 #include "viennacl/linalg/cg.hpp"
50 #include "viennacl/linalg/ilu.hpp"
51 //#include <omp.h>
52 
57 namespace viennacl
58 {
59  namespace linalg
60  {
61  namespace detail
62  {
63  namespace spai
64  {
65 
70  class fspai_tag{
77  public:
79  double residual_norm_threshold = 1e-3,
80  unsigned int iteration_limit = 5,
81  bool is_static = false,
82  bool is_right = false) :
83  residual_norm_threshold_(residual_norm_threshold),
84  iteration_limit_(iteration_limit),
85  is_static_(is_static),
86  is_right_(is_right) {}
87 
88  inline double getResidualNormThreshold() const
89  { return residual_norm_threshold_; }
90  inline unsigned long getIterationLimit () const
91  { return iteration_limit_; }
92  inline bool getIsStatic() const
93  { return is_static_; }
94  inline bool getIsRight() const
95  { return is_right_; }
96  inline void setResidualNormThreshold(double residual_norm_threshold){
97  if(residual_norm_threshold > 0)
98  residual_norm_threshold_ = residual_norm_threshold;
99  }
100  inline void setIterationLimit(unsigned long iteration_limit){
101  if(iteration_limit > 0)
102  iteration_limit_ = iteration_limit;
103  }
104  inline void setIsRight(bool is_right){
105  is_right_ = is_right;
106  }
107  inline void setIsStatic(bool is_static){
108  is_static_ = is_static;
109  }
110 
111  private:
112  double residual_norm_threshold_;
113  unsigned long iteration_limit_;
114  bool is_static_;
115  bool is_right_;
116  };
117 
118 
119  //
120  // Helper: Store A in an STL container of type, exploiting symmetry
121  // Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix...
122  //
123  template <typename MatrixType, typename ScalarType>
124  void sym_sparse_matrix_to_stl(MatrixType const & A, std::vector<std::map<unsigned int, ScalarType> > & STL_A)
125  {
126  STL_A.resize(A.size1());
127  for (typename MatrixType::const_iterator1 row_it = A.begin1();
128  row_it != A.end1();
129  ++row_it)
130  {
131  for (typename MatrixType::const_iterator2 col_it = row_it.begin();
132  col_it != row_it.end();
133  ++col_it)
134  {
135  if (col_it.index1() >= col_it.index2())
136  STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = *col_it;
137  else
138  break; //go to next row
139  }
140  }
141  }
142 
143 
144  //
145  // Generate index sets J_k, k=0,...,N-1
146  //
147  template <typename MatrixType>
148  void generateJ(MatrixType const & A, std::vector<std::vector<vcl_size_t> > & J)
149  {
150  for (typename MatrixType::const_iterator1 row_it = A.begin1();
151  row_it != A.end1();
152  ++row_it)
153  {
154  for (typename MatrixType::const_iterator2 col_it = row_it.begin();
155  col_it != row_it.end();
156  ++col_it)
157  {
158  if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part
159  {
160  J[col_it.index2()].push_back(col_it.index1());
161  J[col_it.index1()].push_back(col_it.index2());
162  }
163  else
164  break; //go to next row
165  }
166  }
167  }
168 
169 
170  //
171  // Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A
172  // Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation
173  //
174  template <typename ScalarType, typename MatrixType, typename VectorType>
175  void fill_blocks(std::vector< std::map<unsigned int, ScalarType> > & A,
176  std::vector<MatrixType> & blocks,
177  std::vector<std::vector<vcl_size_t> > const & J,
178  std::vector<VectorType> & Y)
179  {
180  for (vcl_size_t k=0; k<A.size(); ++k)
181  {
182  std::vector<vcl_size_t> const & Jk = J[k];
183  VectorType & yk = Y[k];
184  MatrixType & block_k = blocks[k];
185 
186  yk.resize(Jk.size());
187  block_k.resize(Jk.size(), Jk.size());
188  block_k.clear();
189 
190  for (vcl_size_t i=0; i<Jk.size(); ++i)
191  {
192  vcl_size_t row_index = Jk[i];
193  std::map<unsigned int, ScalarType> & A_row = A[row_index];
194 
195  //fill y_k:
196  yk[i] = A_row[static_cast<unsigned int>(k)];
197 
198  for (vcl_size_t j=0; j<Jk.size(); ++j)
199  {
200  vcl_size_t col_index = Jk[j];
201  if (col_index <= row_index && A_row.find(static_cast<unsigned int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower triangular part
202  block_k(i, j) = A_row[static_cast<unsigned int>(col_index)];
203  }
204  }
205  }
206  }
207 
208 
209  //
210  // Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58
211  //
212  template <typename MatrixType>
213  void cholesky_decompose(MatrixType & A)
214  {
215  for (vcl_size_t k=0; k<A.size2(); ++k)
216  {
217  if (A(k,k) <= 0)
218  {
219  std::cout << "k: " << k << std::endl;
220  std::cout << "A(k,k): " << A(k,k) << std::endl;
221  }
222 
223  assert(A(k,k) > 0);
224 
225  A(k,k) = std::sqrt(A(k,k));
226 
227  for (vcl_size_t i=k+1; i<A.size1(); ++i)
228  {
229  A(i,k) /= A(k,k);
230  for (vcl_size_t j=k+1; j<=i; ++j)
231  A(i,j) -= A(i,k) * A(j,k);
232  }
233  }
234  }
235 
236 
237  //
238  // Compute x in Ax = b, where A is already Cholesky factored (A = L L^T)
239  //
240  template <typename MatrixType, typename VectorType>
241  void cholesky_solve(MatrixType const & L, VectorType & b)
242  {
243  // inplace forward solve L x = b
244  for (vcl_size_t i=0; i<L.size1(); ++i)
245  {
246  for (vcl_size_t j=0; j<i; ++j)
247  b[i] -= L(i,j) * b[j];
248  b[i] /= L(i,i);
249  }
250 
251  // inplace backward solve L^T x = b:
252  for (vcl_size_t i=L.size1()-1; ; --i)
253  {
254  for (vcl_size_t k=i+1; k<L.size1(); ++k)
255  b[i] -= L(k,i) * b[k];
256  b[i] /= L(i,i);
257 
258  if (i==0) //vcl_size_t might be unsigned, therefore manual check for equality with zero here
259  break;
260  }
261  }
262 
263 
264 
265  //
266  // Compute the Cholesky factor L from the sparse vectors y_k
267  //
268  template <typename MatrixType, typename VectorType1>
269  void computeL(MatrixType const & A,
270  MatrixType & L,
271  MatrixType & L_trans,
272  std::vector<VectorType1> & Y,
273  std::vector<std::vector<vcl_size_t> > & J)
274  {
275  typedef typename VectorType1::value_type ScalarType;
276  typedef std::vector<std::map<unsigned int, ScalarType> > STLSparseMatrixType;
277 
278  STLSparseMatrixType L_temp(A.size1());
279 
280  for (vcl_size_t k=0; k<A.size1(); ++k)
281  {
282  std::vector<vcl_size_t> const & Jk = J[k];
283  VectorType1 const & yk = Y[k];
284 
285  //compute L(k,k):
286  ScalarType Lkk = A(k,k);
287  for (vcl_size_t i=0; i<Jk.size(); ++i)
288  Lkk -= A(Jk[i],k) * yk[i];
289 
290  Lkk = ScalarType(1) / std::sqrt(Lkk);
291  L_temp[k][static_cast<unsigned int>(k)] = Lkk;
292  L_trans(k,k) = Lkk;
293 
294  //write lower diagonal entries:
295  for (vcl_size_t i=0; i<Jk.size(); ++i)
296  {
297  L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i];
298  L_trans(k, Jk[i]) = -Lkk * yk[i];
299  }
300  } //for k
301 
302 
303  //build L from L_temp
304  for (vcl_size_t i=0; i<L_temp.size(); ++i)
305  for (typename std::map<unsigned int, ScalarType>::const_iterator it = L_temp[i].begin();
306  it != L_temp[i].end();
307  ++it)
308  L(i, it->first) = it->second;
309  }
310 
311 
312  //
313  // Top level FSPAI function
314  //
315  template <typename MatrixType>
316  void computeFSPAI(MatrixType const & A,
317  MatrixType const & PatternA,
318  MatrixType & L,
319  MatrixType & L_trans,
320  fspai_tag)
321  {
322  typedef typename MatrixType::value_type ScalarType;
323  typedef boost::numeric::ublas::matrix<ScalarType> DenseMatrixType;
324  typedef std::vector<std::map<unsigned int, ScalarType> > SparseMatrixType;
325 
326  //
327  // preprocessing: Store A in a STL container:
328  //
329  //std::cout << "Transferring to STL container:" << std::endl;
330  std::vector<std::vector<ScalarType> > y_k(A.size1());
331  SparseMatrixType STL_A(A.size1());
332  sym_sparse_matrix_to_stl(A, STL_A);
333 
334 
335  //
336  // Step 1: Generate pattern indices
337  //
338  //std::cout << "computeFSPAI(): Generating pattern..." << std::endl;
339  std::vector<std::vector<vcl_size_t> > J(A.size1());
340  generateJ(PatternA, J);
341 
342  //
343  // Step 2: Set up matrix blocks
344  //
345  //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl;
346  std::vector<DenseMatrixType> subblocks_A(A.size1());
347  fill_blocks(STL_A, subblocks_A, J, y_k);
348  STL_A.clear(); //not needed anymore
349 
350  //
351  // Step 3: Cholesky-factor blocks
352  //
353  //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl;
354  for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
355  {
356  //std::cout << "Block before: " << subblocks_A[i] << std::endl;
357  cholesky_decompose(subblocks_A[i]);
358  //std::cout << "Block after: " << subblocks_A[i] << std::endl;
359  }
360 
361 
362  /*vcl_size_t num_bytes = 0;
363  for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
364  num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/
365  //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl;
366 
367  //
368  // Step 4: Solve for y_k
369  //
370  //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl;
371  for (vcl_size_t i=0; i<y_k.size(); ++i)
372  {
373  if (subblocks_A[i].size1() > 0) //block might be empty...
374  {
375  //y_k[i].resize(subblocks_A[i].size1());
376  //std::cout << "y_k[" << i << "]: ";
377  //for (vcl_size_t j=0; j<y_k[i].size(); ++j)
378  // std::cout << y_k[i][j] << " ";
379  //std::cout << std::endl;
380  cholesky_solve(subblocks_A[i], y_k[i]);
381  }
382  }
383 
384 
385  //
386  // Step 5: Set up Cholesky factors L and L_trans
387  //
388  //std::cout << "computeFSPAI(): Computing L..." << std::endl;
389  L.resize(A.size1(), A.size2(), false);
390  L.reserve(A.nnz(), false);
391  L_trans.resize(A.size1(), A.size2(), false);
392  L_trans.reserve(A.nnz(), false);
393  computeL(A, L, L_trans, y_k, J);
394 
395  //std::cout << "L: " << L << std::endl;
396  }
397 
398 
399 
400  }
401  }
402  }
403 }
404 
405 #endif
void setIsRight(bool is_right)
Definition: fspai.hpp:104
unsigned long getIterationLimit() const
Definition: fspai.hpp:90
std::size_t vcl_size_t
Definition: forwards.h:58
Implementations of dense matrix related operations including matrix-vector products.
fspai_tag(double residual_norm_threshold=1e-3, unsigned int iteration_limit=5, bool is_static=false, bool is_right=false)
Constructor.
Definition: fspai.hpp:78
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
void setResidualNormThreshold(double residual_norm_threshold)
Definition: fspai.hpp:96
void generateJ(MatrixType const &A, std::vector< std::vector< vcl_size_t > > &J)
Definition: fspai.hpp:148
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
void sym_sparse_matrix_to_stl(MatrixType const &A, std::vector< std::map< unsigned int, ScalarType > > &STL_A)
Definition: fspai.hpp:124
void computeFSPAI(MatrixType const &A, MatrixType const &PatternA, MatrixType &L, MatrixType &L_trans, fspai_tag)
Definition: fspai.hpp:316
A tag for FSPAI. Experimental. Contains values for the algorithm. Must be passed to spai_precond cons...
Definition: fspai.hpp:70
double getResidualNormThreshold() const
Definition: fspai.hpp:88
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of the compressed_matrix class.
bool getIsRight() const
Definition: fspai.hpp:94
Implementations of operations using sparse matrices.
void cholesky_decompose(MatrixType &A)
Definition: fspai.hpp:213
void cholesky_solve(MatrixType const &L, VectorType &b)
Definition: fspai.hpp:241
The conjugate gradient method is implemented here.
void computeL(MatrixType const &A, MatrixType &L, MatrixType &L_trans, std::vector< VectorType1 > &Y, std::vector< std::vector< vcl_size_t > > &J)
Definition: fspai.hpp:269
void fill_blocks(std::vector< std::map< unsigned int, ScalarType > > &A, std::vector< MatrixType > &blocks, std::vector< std::vector< vcl_size_t > > const &J, std::vector< VectorType > &Y)
Definition: fspai.hpp:175
void setIterationLimit(unsigned long iteration_limit)
Definition: fspai.hpp:100
bool getIsStatic() const
Definition: fspai.hpp:92
void setIsStatic(bool is_static)
Definition: fspai.hpp:107
Implementation of the ViennaCL scalar class.