1 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
2 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
68 unsigned int iters()
const {
return iters_taken_; }
69 void iters(
unsigned int i)
const { iters_taken_ = i; }
72 double error()
const {
return last_error_; }
74 void error(
double e)
const { last_error_ = e; }
79 unsigned int iterations_;
83 mutable unsigned int iters_taken_;
84 mutable double last_error_;
89 "#if defined(cl_khr_fp64)\n"
90 "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
91 "#elif defined(cl_amd_fp64)\n"
92 "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
94 "__kernel void assign_double_to_float(\n"
95 " __global float * vec1,\n"
96 " __global const double * vec2, \n"
97 " unsigned int size) \n"
99 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
100 " vec1[i] = (float)(vec2[i]);\n"
102 "__kernel void inplace_add_float_to_double(\n"
103 " __global double * vec1,\n"
104 " __global const float * vec2, \n"
105 " unsigned int size) \n"
107 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
108 " vec1[i] += (double)(vec2[i]);\n"
121 template <
typename MatrixType,
typename VectorType>
132 VectorType result(rhs);
135 VectorType residual = rhs;
138 CPU_ScalarType new_ip_rr = 0;
139 CPU_ScalarType norm_rhs_squared = ip_rr;
141 if (norm_rhs_squared == 0)
144 static bool first =
true;
156 float inner_ip_rr =
static_cast<float>(ip_rr);
157 float new_inner_ip_rr = 0;
158 float initial_inner_rhs_norm_squared =
static_cast<float>(ip_rr);
167 rhs.handle().opencl_handle(),
172 residual_low_precision = p_low_precision;
180 matrix.handle().opencl_handle(),
181 cl_uint(matrix.nnz())
197 result_low_precision += alpha * p_low_precision;
198 residual_low_precision -= alpha * tmp_low_precision;
202 beta = new_inner_ip_rr / inner_ip_rr;
203 inner_ip_rr = new_inner_ip_rr;
205 p_low_precision = residual_low_precision + beta * p_low_precision;
214 result_low_precision.
handle().opencl_handle(),
215 cl_uint(result.size())
220 residual = rhs - residual;
228 residual.handle().opencl_handle(),
229 cl_uint(residual.size())
231 result_low_precision.
clear();
232 residual_low_precision = p_low_precision;
233 initial_inner_rhs_norm_squared =
static_cast<float>(new_ip_rr);
234 inner_ip_rr =
static_cast<float>(new_ip_rr);
239 tag.
error(std::sqrt(new_ip_rr / norm_rhs_squared));
244 template <
typename MatrixType,
typename VectorType>
247 return solve(matrix, rhs, tag);
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
std::size_t vcl_size_t
Definition: forwards.h:58
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
Definition: context.hpp:340
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:293
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
void clear()
Resets all entries to zero. Does not change the size of the vector.
Definition: vector.hpp:863
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
const char * double_float_conversion_program
Definition: mixed_precision_cg.hpp:88
A tag class representing the use of no preconditioner.
Definition: forwards.h:727
Implementations of incomplete factorization preconditioners. Convenience header file.
T::value_type type
Definition: result_of.hpp:230
Generic clear functionality for different vector and matrix types.
unsigned int iters() const
Return the number of solver iterations:
Definition: mixed_precision_cg.hpp:68
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
unsigned int max_iterations() const
Returns the maximum number of iterations.
Definition: mixed_precision_cg.hpp:65
Implementations of the OpenCL backend, where all contexts are stored in.
Proxy classes for vectors.
mixed_precision_cg_tag(double tol=1e-8, unsigned int max_iterations=300, float inner_tol=1e-2f)
The constructor.
Definition: mixed_precision_cg.hpp:58
double tolerance() const
Returns the relative tolerance.
Definition: mixed_precision_cg.hpp:61
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Definition: memory.hpp:140
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Representation of an OpenCL kernel in ViennaCL.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: mixed_precision_cg.hpp:72
float inner_tolerance() const
Returns the relative tolerance.
Definition: mixed_precision_cg.hpp:63
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
void iters(unsigned int i) const
Definition: mixed_precision_cg.hpp:69
const handle_type & handle() const
Returns the memory handle.
Definition: vector.hpp:856
VectorType solve(const MatrixType &matrix, VectorType const &rhs, bicgstab_tag const &tag)
Implementation of the stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:92
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: mixed_precision_cg.hpp:49
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: mixed_precision_cg.hpp:74
A collection of compile time type deductions.
Main interface routines for memory management.