ViennaCL - The Vienna Computing Library  1.5.2
cuda.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_BACKEND_CUDA_HPP_
2 #define VIENNACL_BACKEND_CUDA_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
26 #include <iostream>
27 #include <vector>
28 #include <cassert>
30 
31 // includes CUDA
32 #include <cuda_runtime.h>
33 
34 #define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__)
35 
36 namespace viennacl
37 {
38  namespace backend
39  {
40  namespace cuda
41  {
43  // Requirements for backend:
44 
45  // * memory_create(size, host_ptr)
46  // * memory_copy(src, dest, offset_src, offset_dest, size)
47  // * memory_write_from_main_memory(src, offset, size,
48  // dest, offset, size)
49  // * memory_read_to_main_memory(src, offset, size
50  // dest, offset, size)
51  // *
52  //
53 
54  namespace detail
55  {
56 
57 
58  inline void cuda_error_check(cudaError error_code, const char *file, const int line )
59  {
60  if(cudaSuccess != error_code)
61  {
62  std::cerr << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl;
63  throw "CUDA error";
64  }
65  }
66 
67 
69  template <typename U>
70  struct cuda_deleter
71  {
72  void operator()(U * p) const
73  {
74  //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl;
75  cudaFree(p);
76  }
77  };
78 
79  }
80 
87  inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
88  {
89  void * dev_ptr = NULL;
90  VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) );
91  //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " << size_in_bytes << std::endl;
92 
93  if (!host_ptr)
94  return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>());
95 
96  handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>());
97 
98  // copy data:
99  //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl;
100  cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
101 
102  return new_handle;
103  }
104 
105 
114  inline void memory_copy(handle_type const & src_buffer,
115  handle_type & dst_buffer,
116  vcl_size_t src_offset,
117  vcl_size_t dst_offset,
118  vcl_size_t bytes_to_copy)
119  {
120  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
121  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
122 
123  cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset),
124  reinterpret_cast<void *>(src_buffer.get() + src_offset),
125  bytes_to_copy,
126  cudaMemcpyDeviceToDevice);
127  }
128 
129 
138  inline void memory_write(handle_type & dst_buffer,
139  vcl_size_t dst_offset,
140  vcl_size_t bytes_to_copy,
141  const void * ptr,
142  bool async = false)
143  {
144  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
145 
146  if (async)
147  cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
148  reinterpret_cast<const char *>(ptr),
149  bytes_to_copy,
150  cudaMemcpyHostToDevice);
151  else
152  cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
153  reinterpret_cast<const char *>(ptr),
154  bytes_to_copy,
155  cudaMemcpyHostToDevice);
156  }
157 
158 
167  inline void memory_read(handle_type const & src_buffer,
168  vcl_size_t src_offset,
169  vcl_size_t bytes_to_copy,
170  void * ptr,
171  bool async = false)
172  {
173  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
174 
175  if (async)
176  cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
177  reinterpret_cast<char *>(src_buffer.get()) + src_offset,
178  bytes_to_copy,
179  cudaMemcpyDeviceToHost);
180  else
181  cudaMemcpy(reinterpret_cast<char *>(ptr),
182  reinterpret_cast<char *>(src_buffer.get()) + src_offset,
183  bytes_to_copy,
184  cudaMemcpyDeviceToHost);
185  }
186 
187  } //cuda
188  } //backend
189 } //viennacl
190 #endif
void cuda_error_check(cudaError error_code, const char *file, const int line)
Definition: cuda.hpp:58
std::size_t vcl_size_t
Definition: forwards.h:58
T * get() const
Definition: shared_ptr.hpp:134
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'.
Definition: cuda.hpp:138
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory star...
Definition: cuda.hpp:114
viennacl::tools::shared_ptr< char > handle_type
Definition: cuda.hpp:42
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
#define VIENNACL_CUDA_ERROR_CHECK(err)
Definition: cuda.hpp:34
Functor for deleting a CUDA handle. Used within the smart pointer class.
Definition: cuda.hpp:70
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size on the CUDA device. If the second argument is provided...
Definition: cuda.hpp:87
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from a CUDA buffer back to main RAM.
Definition: cuda.hpp:167
void operator()(U *p) const
Definition: cuda.hpp:72