1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
37 #include "boost/numeric/ublas/vector.hpp"
38 #include "boost/numeric/ublas/matrix.hpp"
39 #include "boost/numeric/ublas/matrix_proxy.hpp"
40 #include "boost/numeric/ublas/storage.hpp"
41 #include "boost/numeric/ublas/io.hpp"
42 #include "boost/numeric/ublas/matrix_expression.hpp"
43 #include "boost/numeric/ublas/detail/matrix_assign.hpp"
62 template<
typename T,
typename InputIterator>
63 void Print(std::ostream& ostr, InputIterator it_begin, InputIterator it_end){
65 std::string delimiters =
" ";
66 std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));
70 template<
typename VectorType,
typename MatrixType>
71 void write_to_block(VectorType& con_A_I_J,
unsigned int start_ind,
const std::vector<unsigned int>& I,
const std::vector<unsigned int>& J, MatrixType& m){
72 m.resize(I.size(), J.size(),
false);
75 m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
80 template<
typename VectorType>
82 const std::vector<std::vector<unsigned int> >& g_I,
const std::vector<std::vector<unsigned int> >& g_J){
83 typedef typename VectorType::value_type ScalarType;
84 std::vector<boost::numeric::ublas::matrix<ScalarType> > com_A_I_J(g_I.size());
86 write_to_block( con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
87 std::cout<<com_A_I_J[i]<<std::endl;
90 template<
typename VectorType>
91 void print_continious_vector(VectorType& con_v, std::vector<cl_uint>& block_ind,
const std::vector<std::vector<unsigned int> >& g_J){
92 typedef typename VectorType::value_type ScalarType;
93 std::vector<boost::numeric::ublas::vector<ScalarType> > com_v(g_J.size());
96 com_v[i].resize(g_J[i].
size());
98 com_v[i](j) = con_v[block_ind[i] + j];
100 std::cout<<com_v[i]<<std::endl;
112 inline void compute_blocks_size(
const std::vector<std::vector<unsigned int> >& g_I,
const std::vector<std::vector<unsigned int> >& g_J,
113 unsigned int& sz, std::vector<cl_uint>& blocks_ind, std::vector<cl_uint>& matrix_dims)
117 sz +=
static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
118 matrix_dims[2*i] =
static_cast<cl_uint
>(g_I[i].size());
119 matrix_dims[2*i + 1] =
static_cast<cl_uint
>(g_J[i].size());
120 blocks_ind[i+1] = blocks_ind[i] +
static_cast<cl_uint
>(g_I[i].size()*g_J[i].size());
128 template <
typename SizeType>
129 void get_size(
const std::vector<std::vector<SizeType> >& inds, SizeType &
size){
131 for (
vcl_size_t i = 0; i < inds.size(); ++i) {
132 size +=
static_cast<unsigned int>(inds[i].size());
140 template <
typename SizeType>
141 void init_start_inds(
const std::vector<std::vector<SizeType> >& inds, std::vector<cl_uint>& start_inds){
143 start_inds[i+1] = start_inds[i] +
static_cast<cl_uint
>(inds[i].size());
153 template<
typename MatrixType,
typename ScalarType>
154 void dot_prod(
const MatrixType& A,
unsigned int beg_ind, ScalarType& res){
155 res =
static_cast<ScalarType
>(0);
156 for(
vcl_size_t i = beg_ind; i < A.size1(); ++i){
157 res += A(i, beg_ind-1)*A(i, beg_ind-1);
167 template<
typename MatrixType,
typename VectorType,
typename ScalarType>
168 void custom_inner_prod(
const MatrixType& A,
const VectorType& v,
unsigned int col_ind,
unsigned int start_ind, ScalarType& res){
169 res =
static_cast<ScalarType
>(0);
170 for(
unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i){
171 res += A(i, col_ind)*v(i);
180 template<
typename MatrixType,
typename VectorType>
181 void copy_vector(
const MatrixType & A, VectorType & v,
const unsigned int beg_ind){
182 for(
unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i){
183 v(i) = A( i, beg_ind-1);
194 template<
typename MatrixType,
typename VectorType,
typename ScalarType>
202 v(j) =
static_cast<ScalarType
>(1.0);
207 mu = std::sqrt(A(j,j)*A(j, j) + sg);
211 v(j) = -sg/(A(j, j) + mu);
213 b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
223 template<
typename MatrixType,
typename VectorType,
typename ScalarType>
227 ScalarType in_prod_res;
228 for(
unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i){
231 for(
unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j){
232 A(j, i) -= b*in_prod_res*v(j);
242 template<
typename MatrixType,
typename VectorType>
245 for(
unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i){
256 template<
typename MatrixType,
typename VectorType>
259 typedef typename MatrixType::value_type ScalarType;
260 if((R.size1() > 0) && (R.size2() > 0)){
261 VectorType v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size1());
262 b_v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size2());
263 for(
unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i){
294 template <
typename SizeType>
299 if(inds[i].
size() > max_size){
300 max_size =
static_cast<SizeType
>(inds[i].size());
311 template<
typename MatrixType,
typename VectorType,
typename ScalarType>
312 void custom_dot_prod(
const MatrixType& A,
const VectorType& v,
unsigned int ind, ScalarType& res)
314 res =
static_cast<ScalarType
>(0);
315 for(
unsigned int j = ind; j < A.size1(); ++j){
319 res += A(j, ind)*v(j);
329 template<
typename MatrixType,
typename VectorType>
332 typedef typename MatrixType::value_type ScalarType;
333 ScalarType inn_prod =
static_cast<ScalarType
>(0);
338 y(j) -= b_v(i)*inn_prod;
341 y(j) -= b_v(i)*inn_prod*R(j,i);
352 template<
typename MatrixType,
typename VectorType>
357 tmp_v = (VectorType)
column(A,i);
373 template<
typename ScalarType>
374 void block_qr(std::vector<std::vector<unsigned int> >& g_I,
375 std::vector<std::vector<unsigned int> >& g_J,
378 std::vector<cl_uint>& g_is_update,
384 unsigned int bv_size = 0;
385 unsigned int v_size = 0;
388 unsigned int local_r_n = 0;
389 unsigned int local_c_n = 0;
397 std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
398 std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
402 std::vector<ScalarType> b_v(bv_size, static_cast<ScalarType>(0));
403 std::vector<ScalarType> v(v_size, static_cast<ScalarType>(0));
408 static_cast<unsigned int>(
sizeof(ScalarType)*bv_size),
412 static_cast<unsigned int>(
sizeof(ScalarType)*v_size),
416 static_cast<unsigned int>(
sizeof(cl_uint)*g_I.size()),
417 &(start_bv_inds[0]));
420 static_cast<unsigned int>(
sizeof(cl_uint)*g_I.size()),
423 static_cast<unsigned int>(
sizeof(cl_uint)*g_is_update.size()),
435 static_cast<cl_uint
>(g_I.size())));
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Definition: spai.hpp:570
void apply_q_trans_mat(const MatrixType &R, const VectorType &b_v, MatrixType &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
Definition: qr.hpp:353
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Definition: block_vector.hpp:60
std::size_t vcl_size_t
Definition: forwards.h:58
Represents a contigious matrices on GPU.
Definition: block_matrix.hpp:49
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
Definition: block_matrix.hpp:56
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
OpenCL kernel file for sparse approximate inverse operations.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
Definition: block_matrix.hpp:64
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Definition: block_vector.hpp:56
void apply_householder_reflection(MatrixType &A, unsigned int iter_cnt, VectorType &v, ScalarType b)
Inplace application of Householder vector to a matrix A.
Definition: qr.hpp:224
void custom_dot_prod(const MatrixType &A, const VectorType &v, unsigned int ind, ScalarType &res)
Dot_prod(column(A, ind), v) starting from index ind+1.
Definition: qr.hpp:312
void write_to_block(VectorType &con_A_I_J, unsigned int start_ind, const std::vector< unsigned int > &I, const std::vector< unsigned int > &J, MatrixType &m)
Definition: qr.hpp:71
void get_max_block_size(const std::vector< std::vector< SizeType > > &inds, SizeType &max_size)
Getting max size of rows/columns from container of index set.
Definition: qr.hpp:295
void store_householder_vector(MatrixType &A, unsigned int ind, VectorType &v)
Storage of vector v in column(A, ind), starting from ind-1 index of a column.
Definition: qr.hpp:243
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Implementation of a bunch of (small) matrices on GPU. Experimental.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:199
Implementation of a bunch of vectors on GPU. Experimental.
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
void get_size(const std::vector< std::vector< SizeType > > &inds, SizeType &size)
Computes size of particular container of index set.
Definition: qr.hpp:129
void init_start_inds(const std::vector< std::vector< SizeType > > &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Definition: qr.hpp:141
void apply_q_trans_vec(const MatrixType &R, const VectorType &b_v, VectorType &y)
Recovery Q from matrix R and vector of betas b_v.
Definition: qr.hpp:330
void print_continious_vector(VectorType &con_v, std::vector< cl_uint > &block_ind, const std::vector< std::vector< unsigned int > > &g_J)
Definition: qr.hpp:91
void Print(std::ostream &ostr, InputIterator it_begin, InputIterator it_end)
Definition: qr.hpp:63
void compute_blocks_size(const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
Definition: qr.hpp:112
Implementations of the OpenCL backend, where all contexts are stored in.
void dot_prod(const MatrixType &A, unsigned int beg_ind, ScalarType &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
Definition: qr.hpp:154
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a contigious vector on GPU.
Definition: block_vector.hpp:49
void single_qr(MatrixType &R, VectorType &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
Definition: qr.hpp:257
void block_qr(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224 performed on GPU.
Definition: qr.hpp:374
void custom_inner_prod(const MatrixType &A, const VectorType &v, unsigned int col_ind, unsigned int start_ind, ScalarType &res)
Dot prod of particular matrix column with arbitrary vector: A(:, col_ind)
Definition: qr.hpp:168
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:918
static void init(viennacl::ocl::context &ctx)
Definition: spai.hpp:577
void copy_vector(const MatrixType &A, VectorType &v, const unsigned int beg_ind)
Copying part of matrix column.
Definition: qr.hpp:181
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
Definition: block_matrix.hpp:60
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
void print_continious_matrix(VectorType &con_A_I_J, std::vector< cl_uint > &blocks_ind, const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J)
Definition: qr.hpp:81
void householder_vector(const MatrixType &A, unsigned int j, VectorType &v, ScalarType &b)
Coputation of Householder vector, householder reflection c.f. Gene H. Golub, Charles F...
Definition: qr.hpp:195