1 #ifndef VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE
2 #define VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE
49 typedef std::list< std::pair<scheduler::statement, scheduler::statement_node> >
statements_type;
64 virtual void print(std::ostream & s)
const{
101 if(dev.
type()==CL_DEVICE_TYPE_GPU){
124 return invalid_work_group_sizes
125 ||
lmem_used(scalartype_size)>lmem_available
139 std::vector<detail::mapping_type> mapping(statements.size());
142 std::string prototype;
143 std::set<std::string> already_generated;
147 std::map<void *, vcl_size_t> memory;
148 unsigned int current_arg = 0;
150 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it)
151 detail::traverse(it->first, it->second,
detail::map_functor(memory,current_arg,mapping[i++]));
154 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
158 prototype.erase(prototype.size()-1);
164 stream <<
"__kernel " <<
"void " <<
"kernel_" << device_offset <<
"_" << n <<
"(" << std::endl;
165 stream << prototype << std::endl;
166 stream <<
")" << std::endl;
169 stream <<
"{" << std::endl;
171 core(n, stream, statements, mapping);
173 stream <<
"}" << std::endl;
friend std::ostream & operator<<(std::ostream &, profile_base const &)
Definition: profile_base.hpp:185
A stream class where the kernel sources are streamed to. Takes care of indentation of the sources...
Definition: utils.hpp:233
cl_ulong local_mem_size() const
Size of local memory arena in bytes. The minimum value is 32 KB.
Definition: device.hpp:358
virtual void operator()(utils::kernel_generation_stream &stream, vcl_size_t device_offset, statements_type const &statements) const
Generates the code associated with this profile onto the provided stream Redirects to the virtual cor...
Definition: profile_base.hpp:138
std::size_t vcl_size_t
Definition: forwards.h:58
vcl_size_t num_kernels_
Definition: profile_base.hpp:181
Represents an OpenCL device within ViennaCL.
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
Definition: device.hpp:481
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Base class for an operation profile.
Definition: profile_base.hpp:47
virtual ~profile_base()
The destructor.
Definition: profile_base.hpp:82
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
virtual bool is_slow_impl(viennacl::ocl::device const &) const
Definition: profile_base.hpp:55
Functor to map the statements to the types defined in mapped_objects.hpp.
virtual std::string csv_representation() const =0
csv representation of an operation
std::list< std::pair< scheduler::statement, scheduler::statement_node > > statements_type
Definition: profile_base.hpp:49
several code generation helpers
Implementation of convenience functions to get infos.
std::vector< size_t > max_work_item_sizes() const
Maximum number of work-items that can be specified in each dimension of the work-group.
Definition: device.hpp:508
virtual void core(vcl_size_t kernel_id, utils::kernel_generation_stream &stream, statements_type const &statements, std::vector< detail::mapping_type > const &mapping) const =0
Generates the body of the associated kernel function.
functor for generating the prototype of a statement
Definition: helpers.hpp:152
virtual void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const &statements, viennacl::ocl::kernel &k, unsigned int &n_arg) const =0
Configures the range and enqueues the arguments associated with the profile.
Functor to map the statements to the types defined in mapped_objects.hpp.
Definition: map_functor.hpp:47
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
Definition: device.hpp:897
Various utility implementations for dispatching with respect to the different devices available on th...
void configure_local_sizes(viennacl::ocl::kernel &k, vcl_size_t) const
Definition: profile_base.hpp:59
std::ostream & operator<<(std::ostream &os, profile_base const &profile)
Definition: profile_base.hpp:185
void dec_tab()
Definition: utils.hpp:259
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
Implementations of the OpenCL backend, where all contexts are stored in.
virtual void kernel_arguments(statements_type const &statements, std::string &arguments_string) const =0
unsigned int vector_size_
Definition: profile_base.hpp:178
unsigned int vector_size() const
Get the vector size of the kernel.
Definition: profile_base.hpp:90
virtual void print(std::ostream &s) const
Definition: profile_base.hpp:64
Representation of an OpenCL kernel in ViennaCL.
bool is_slow(viennacl::ocl::device const &dev) const
returns whether or not the profile is likely to be slow on a particular device
Definition: profile_base.hpp:99
void inc_tab()
Definition: utils.hpp:257
vcl_size_t num_kernels() const
Returns the number of kernels needed by this operation.
Definition: profile_base.hpp:130
virtual bool invalid_impl(viennacl::ocl::device const &, vcl_size_t) const
Definition: profile_base.hpp:54
vcl_size_t local_size_2_
Definition: profile_base.hpp:180
virtual vcl_size_t lmem_used(vcl_size_t) const
Definition: profile_base.hpp:57
profile_base(unsigned int vectorization, vcl_size_t local_size_1, vcl_size_t local_size_2, vcl_size_t num_kernels)
The constructor.
Definition: profile_base.hpp:79
cl_device_type type() const
The OpenCL device type.
Definition: device.hpp:873
vcl_size_t local_size_1_
Definition: profile_base.hpp:179
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
bool is_invalid(viennacl::ocl::device const &dev, vcl_size_t scalartype_size) const
returns whether or not the profile leads to undefined behavior on particular device ...
Definition: profile_base.hpp:114