ViennaCL - The Vienna Computing Library  1.5.2
saxpy.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GENERATOR_GENERATE_SAXPY_HPP
2 #define VIENNACL_GENERATOR_GENERATE_SAXPY_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
27 #include <vector>
28 
30 
34 
36 
37 #include "viennacl/tools/tools.hpp"
38 
39 namespace viennacl{
40 
41  namespace generator{
42 
44  class vector_saxpy : public profile_base{
45  public:
46  static std::string csv_format() {
47  return "Vec,LSize1,NumGroups1,GlobalDecomposition";
48  }
49 
50  std::string csv_representation() const{
51  std::ostringstream oss;
52  oss << vector_size_
53  << "," << local_size_1_
54  << "," << num_groups_
55  << "," << decomposition_;
56  return oss.str();
57  }
58 
59  vector_saxpy(unsigned int v, vcl_size_t gs, vcl_size_t ng, unsigned int d) : profile_base(v, gs, 1, 1), num_groups_(ng), decomposition_(d){ }
60 
61  void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{
62  configure_local_sizes(k, kernel_id);
63 
64  k.global_work_size(0,local_size_1_*num_groups_);
65  k.global_work_size(1,1);
66 
67  scheduler::statement_node const & first_node = statements.front().second;
68  viennacl::vcl_size_t N = utils::call_on_vector(first_node.lhs, utils::internal_size_fun());
69  k.arg(n_arg++, cl_uint(N/vector_size_));
70  }
71  void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{
72  arguments_string += detail::generate_value_kernel_argument("unsigned int", "N");
73  }
74 
75  private:
76 
77  void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const {
78  stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl;
79  stream << "{" << std::endl;
80  stream.inc_tab();
81 
82  //Fetches entries to registers
83  std::set<std::string> fetched;
84  for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it)
85  for(detail::mapping_type::const_reverse_iterator iit = it->rbegin() ; iit != it->rend() ; ++iit)
86  //Useless to fetch cpu scalars into registers
87  if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(iit->second.get()))
88  p->fetch( std::make_pair("i","0"), vector_size_, fetched, stream);
89 
90  //Generates all the expression, in order
91  vcl_size_t i = 0;
92  for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
93  std::string str;
94  detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i","0"), -1, str, mapping[i++]));
95  stream << str << ";" << std::endl;
96  }
97 
98  //Writes back
99  for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it)
100  //Gets the mapped object at the LHS of each expression
101  if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second, detail::LHS_NODE_TYPE)).get()))
102  p->write_back( std::make_pair("i", "0"), fetched, stream);
103 
104  stream.dec_tab();
105  stream << "}" << std::endl;
106  }
107 
108  private:
109  vcl_size_t num_groups_;
110  unsigned int decomposition_;
111 
112  };
113 
114 
115 
117  class matrix_saxpy : public profile_base{
118 
119  bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const{ return false; }
120  bool is_slow_impl(viennacl::ocl::device const &) const { return false; }
121 
122  public:
123  matrix_saxpy(unsigned int v, vcl_size_t gs1, vcl_size_t gs2, vcl_size_t ng1, vcl_size_t ng2, unsigned int d) : profile_base(v, gs1, gs2, 1), num_groups_row_(ng1), num_groups_col_(ng2), decomposition_(d){ }
124 
125  static std::string csv_format() {
126  return "Vec,LSize1,LSize2,NumGroups1,NumGroups2,GlobalDecomposition";
127  }
128 
129  std::string csv_representation() const{
130  std::ostringstream oss;
131  oss << vector_size_
132  << "," << local_size_1_
133  << "," << local_size_2_
134  << "," << num_groups_row_
135  << "," << num_groups_col_
136  << "," << decomposition_;
137  return oss.str();
138  }
139 
140  void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{
141  configure_local_sizes(k, kernel_id);
142 
143  k.global_work_size(0,local_size_1_*num_groups_row_);
144  k.global_work_size(1,local_size_2_*num_groups_col_);
145 
146  scheduler::statement_node const & first_node = statements.front().second;
147  k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size1_fun())));
148  k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size2_fun())));
149  }
150 
151  void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{
152  arguments_string += detail::generate_value_kernel_argument("unsigned int", "M");
153  arguments_string += detail::generate_value_kernel_argument("unsigned int", "N");
154  }
155 
156  private:
157  void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const {
158 
159  for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){
160  for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){
161  if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix*>(iit->second.get()))
162  p->bind_sizes("M","N");
163  }
164  }
165 
166  stream << "for(unsigned int i = get_global_id(0) ; i < M ; i += get_global_size(0))" << std::endl;
167  stream << "{" << std::endl;
168  stream.inc_tab();
169  stream << "for(unsigned int j = get_global_id(1) ; j < N ; j += get_global_size(1))" << std::endl;
170  stream << "{" << std::endl;
171  stream.inc_tab();
172 
173  //Fetches entries to registers
174  std::set<std::string> fetched;
175  for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it)
176  for(detail::mapping_type::const_reverse_iterator it2 = it->rbegin() ; it2 != it->rend() ; ++it2)
177  if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix *>(it2->second.get()))
178  p->fetch(std::make_pair("i", "j"), vector_size_, fetched, stream);
179 
180 
181  vcl_size_t i = 0;
182  for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
183  std::string str;
184  detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i", "j"), -1, str, mapping[i++]));
185  stream << str << ";" << std::endl;
186  }
187 
188  //Writes back
189  for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
190  if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second,detail::LHS_NODE_TYPE)).get()))
191  p->write_back(std::make_pair("i", "j"), fetched, stream);
192  }
193 
194  stream.dec_tab();
195  stream << "}" << std::endl;
196  stream.dec_tab();
197  stream << "}" << std::endl;
198  }
199 
200  private:
201  vcl_size_t num_groups_row_;
202  vcl_size_t num_groups_col_;
203 
204  unsigned int decomposition_;
205  };
206  }
207 
208 }
209 
210 #endif
A stream class where the kernel sources are streamed to. Takes care of indentation of the sources...
Definition: utils.hpp:233
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
Definition: kernel.hpp:124
void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const &statements, viennacl::ocl::kernel &k, unsigned int &n_arg) const
Configures the range and enqueues the arguments associated with the profile.
Definition: saxpy.hpp:61
std::size_t vcl_size_t
Definition: forwards.h:58
OpenCL kernel generation class for matrix expressions of AXPY type, i.e. A = alpha * B + beta * C...
Definition: saxpy.hpp:117
static std::string csv_format()
Definition: saxpy.hpp:125
Internal utils for a dynamic OpenCL kernel generation.
void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const &statements, viennacl::ocl::kernel &k, unsigned int &n_arg) const
Configures the range and enqueues the arguments associated with the profile.
Definition: saxpy.hpp:140
vector_saxpy(unsigned int v, vcl_size_t gs, vcl_size_t ng, unsigned int d)
Definition: saxpy.hpp:59
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Base class for an operation profile.
Definition: profile_base.hpp:47
lhs_rhs_element lhs
Definition: forwards.h:422
Various little tools used here and there in ViennaCL.
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
Base class for datastructures passed by pointer.
Definition: mapped_objects.hpp:133
Mapping of a matrix to a generator class.
Definition: mapped_objects.hpp:236
Functor for obtaining the internal number of columns of a ViennaCL matrix.
Definition: utils.hpp:188
std::list< std::pair< scheduler::statement, scheduler::statement_node > > statements_type
Definition: profile_base.hpp:49
ValueT const & at(std::map< KeyT, ValueT > const &map, KeyT const &key)
Emulation of C++11's .at() member for std::map<>
Definition: forwards.h:97
static std::string csv_format()
Definition: saxpy.hpp:46
several code generation helpers
OpenCL kernel generation class for vector expressions of AXPY type, i.e. x = alpha * y + beta * z...
Definition: saxpy.hpp:44
Base classes for the profiles.
void kernel_arguments(statements_type const &, std::string &arguments_string) const
Definition: saxpy.hpp:71
Map ViennaCL objects to generator wrappers.
void kernel_arguments(statements_type const &, std::string &arguments_string) const
Definition: saxpy.hpp:151
Functor for obtaining the internal number of rows of a ViennaCL matrix.
Definition: utils.hpp:181
std::string csv_representation() const
csv representation of an operation
Definition: saxpy.hpp:129
void configure_local_sizes(viennacl::ocl::kernel &k, vcl_size_t) const
Definition: profile_base.hpp:59
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
unsigned int vector_size_
Definition: profile_base.hpp:178
matrix_saxpy(unsigned int v, vcl_size_t gs1, vcl_size_t gs2, vcl_size_t ng1, vcl_size_t ng2, unsigned int d)
Definition: saxpy.hpp:123
vcl_size_t local_size_2_
Definition: profile_base.hpp:180
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
vcl_size_t local_size_1_
Definition: profile_base.hpp:179
functor for generating the expression string from a statement
Definition: helpers.hpp:224
std::string csv_representation() const
csv representation of an operation
Definition: saxpy.hpp:50
Main datastructure for an node in the statement tree.
Definition: forwards.h:420
Functor for returning the internal size of a vector.
Definition: utils.hpp:167