1 #ifndef VIENNACL_GENERATOR_AUTOTUNE_HPP
2 #define VIENNACL_GENERATOR_AUTOTUNE_HPP
58 bool is_max()
const {
return current_ == (values_.size()-1); }
63 if(current_ < values_.size() )
70 int current()
const{
return values_[current_]; }
76 std::vector<int> values_;
77 unsigned int current_;
88 template<
class ConfigType>
92 typedef std::map<std::string, viennacl::generator::autotune::tuning_param> params_t;
102 params_.insert(std::make_pair(name,values));
108 for(
typename params_t::const_iterator it = params_.begin() ; it != params_.end() ; ++it)
109 res = res || !it->second.is_max();
115 for(
typename params_t::iterator it = params_.begin() ; it != params_.end() ; ++it)
116 if(it->second.inc()==
false)
122 return config_type::is_invalid(dev,params_);
127 return config_type::create_profile(params_);
132 for(params_t::iterator it = params_.begin() ; it != params_.end() ; ++it){
142 template<
class ProfileT>
146 std::list<viennacl::ocl::kernel *> kernels;
149 gen.
add(statement, statement.
array()[0]);
155 for(
unsigned int i = 0 ; i < n_runs ; ++i)
158 return (
double)t.
get()/n_runs;
173 template<
class ConfigType>
176 unsigned int n_conf = 0;
179 typename ConfigType::profile_type
const & profile = config.
get_current();
180 if(config.
is_invalid(dev) || profile.is_slow(dev))
189 typename ConfigType::profile_type
const & profile = config.
get_current();
190 if(config.
is_invalid(dev) || profile.is_slow(dev))
192 double percent = (double)n++*100/n_conf;
194 timings->insert(std::make_pair(exec_time, profile));
195 std::cout <<
'\r' <<
"Autotuning..." <<
"[" << std::setprecision(2) << std::setfill (
' ') << std::setw(6) << std::fixed << percent <<
"%" <<
"]"
196 <<
" | Best : " << timings->begin()->second <<
" => " << std::scientific << std::right << std::setprecision(2) << timings->begin()->first << std::flush;
198 *out << std::setprecision(3) << std::scientific << exec_time <<
"," << profile.csv_representation() << std::endl ;
200 std::cout <<
'\r' <<
"Autotuning..." <<
"[100.00%]" << std::endl;
208 #endif // AUTOTUNE_HPP
config_type::profile_type get_current()
Returns the current profile.
Definition: autotune.hpp:126
bool has_next() const
Returns true if the tuning config has still not explored all its possibilities.
Definition: autotune.hpp:106
viennacl::ocl::program & get_configured_program(viennacl::generator::code_generator const &generator, std::list< viennacl::ocl::kernel * > &kernels, bool force_recompilation=false)
Creates the program associated with a generator object and fills the kernels. Checks the context for ...
Definition: generate.hpp:351
void reset()
Resets the parameter to its minimum value.
Definition: autotune.hpp:73
void enqueue(viennacl::generator::code_generator const &generator, bool force_recompilation=false)
Set the arguments and enqueue a generator object.
Definition: generate.hpp:372
bool is_invalid(viennacl::ocl::device const &dev) const
Returns true if the compilation/execution of the underlying profile has an undefined behavior...
Definition: autotune.hpp:121
void force_profile(forced_profile_key_type key, T const &t)
Force the generator to use a specific profile for an operation.
Definition: generate.hpp:225
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
class for a tuning parameter
Definition: autotune.hpp:48
the user interface for the code generator
tuning_param(std::vector< int > const &values)
The constructor.
Definition: autotune.hpp:55
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:330
double benchmark_impl(viennacl::scheduler::statement const &statement, code_generator::forced_profile_key_type key, ProfileT const &prof, unsigned int n_runs)
Add the timing value for a given profile and an statement.
Definition: autotune.hpp:143
Implementation of convenience functions to get infos.
Tuning configuration.
Definition: autotune.hpp:89
bool inc()
Increments the parameter.
Definition: autotune.hpp:61
void update()
Update the parameters of the config.
Definition: autotune.hpp:114
void add_tuning_param(std::string const &name, std::vector< int > const &values)
Add a tuning parameter to the config.
Definition: autotune.hpp:101
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
void benchmark(std::map< double, typename ConfigType::profile_type > *timings, scheduler::statement const &op, code_generator::forced_profile_key_type const &key, tuning_config< ConfigType > &config, unsigned int n_runs, std::ofstream *out)
Fills a timing map for a given statement and a benchmark configuration.
Definition: autotune.hpp:174
int current() const
Returns the current value of the parameter.
Definition: autotune.hpp:70
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling.
container_type const & array() const
Definition: forwards.h:473
Representation of an OpenCL kernel in ViennaCL.
Class for handling code generation.
Definition: generate.hpp:47
bool add(scheduler::statement const &statement, scheduler::statement_node const &root_node)
Add a statement and the root node to the expression list.
Definition: generate.hpp:232
config_type::profile_type profile_type
Accessor for profile_type.
Definition: autotune.hpp:98
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:447
void reset()
Reset the config.
Definition: autotune.hpp:131
std::pair< expression_type, vcl_size_t > forced_profile_key_type
typedef of the key used in the forced profiles. Contains the expression type and the size of the scal...
Definition: generate.hpp:50
bool is_max() const
Returns true if the parameter has reached its maximum value.
Definition: autotune.hpp:58
ConfigType config_type
Definition: autotune.hpp:95