1 #ifndef VIENNACL_MATRIX_PROXY_HPP_
2 #define VIENNACL_MATRIX_PROXY_HPP_
37 template <
typename MatrixType>
38 class matrix_range :
public matrix_base<typename MatrixType::cpu_value_type, typename MatrixType::orientation_functor>
40 typedef matrix_base<
typename MatrixType::cpu_value_type,
41 typename MatrixType::orientation_functor> base_type;
42 typedef matrix_range<MatrixType> self_type;
55 range const & row_range,
60 using base_type::operator=;
70 template <
typename CPU_MATRIX,
typename SCALARTYPE>
71 void copy(
const CPU_MATRIX & cpu_matrix,
74 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
75 && (cpu_matrix.size2() == gpu_matrix_range.size2())
76 &&
bool(
"Matrix size mismatch!"));
78 if ( gpu_matrix_range.start2() != 0)
80 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
83 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
85 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
86 entries[j] = cpu_matrix(i,j);
88 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
89 vcl_size_t num_entries = gpu_matrix_range.size2();
97 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
100 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
101 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
102 entries[i*gpu_matrix_range.internal_size2() + j] = cpu_matrix(i,j);
104 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
105 vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2();
112 template <
typename CPU_MATRIX,
typename SCALARTYPE>
113 void copy(
const CPU_MATRIX & cpu_matrix,
116 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
117 && (cpu_matrix.size2() == gpu_matrix_range.size2())
118 &&
bool(
"Matrix size mismatch!"));
120 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.size1())
122 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
125 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
127 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
128 entries[i] = cpu_matrix(i,j);
130 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
131 vcl_size_t num_entries = gpu_matrix_range.size1();
139 std::vector<SCALARTYPE> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
142 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
143 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
144 entries[i + j*gpu_matrix_range.internal_size1()] = cpu_matrix(i,j);
146 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
147 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
161 template <
typename CPU_MATRIX,
typename SCALARTYPE>
163 CPU_MATRIX & cpu_matrix)
165 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
166 && (cpu_matrix.size2() == gpu_matrix_range.size2())
167 &&
bool(
"Matrix size mismatch!"));
169 if ( gpu_matrix_range.start2() != 0)
171 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
174 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
176 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
177 vcl_size_t num_entries = gpu_matrix_range.size2();
181 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
182 cpu_matrix(i,j) = entries[j];
188 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
190 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
191 vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
195 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
196 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
197 cpu_matrix(i,j) = entries[i*gpu_matrix_range.internal_size2() + j];
204 template <
typename CPU_MATRIX,
typename SCALARTYPE>
206 CPU_MATRIX & cpu_matrix)
208 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
209 && (cpu_matrix.size2() == gpu_matrix_range.size2())
210 &&
bool(
"Matrix size mismatch!"));
212 if ( gpu_matrix_range.start1() != 0)
214 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
217 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
219 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
220 vcl_size_t num_entries = gpu_matrix_range.size1();
224 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
225 cpu_matrix(i,j) = entries[i];
231 std::vector<SCALARTYPE> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
234 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
235 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
239 for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
240 for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
241 cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.internal_size1()];
250 template <
typename MatrixType>
253 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
259 template <
typename MatrixType>
262 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
289 template <
typename MatrixType>
290 class matrix_slice :
public matrix_base<typename MatrixType::cpu_value_type, typename MatrixType::orientation_functor>
292 typedef matrix_base<
typename MatrixType::cpu_value_type,
293 typename MatrixType::orientation_functor> base_type;
294 typedef matrix_slice<MatrixType> self_type;
307 slice const & row_slice,
312 using base_type::operator=;
323 template <
typename CPU_MATRIX,
typename SCALARTYPE>
324 void copy(
const CPU_MATRIX & cpu_matrix,
327 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
328 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
329 &&
bool(
"Matrix size mismatch!"));
331 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
333 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
335 std::vector<SCALARTYPE> entries(num_entries);
338 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
340 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
343 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
344 entries[j * gpu_matrix_slice.stride2()] = cpu_matrix(i,j);
352 template <
typename CPU_MATRIX,
typename SCALARTYPE>
353 void copy(
const CPU_MATRIX & cpu_matrix,
356 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
357 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
358 &&
bool(
"Matrix size mismatch!"));
361 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
363 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
365 std::vector<SCALARTYPE> entries(num_entries);
368 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
370 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
374 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
375 entries[i * gpu_matrix_slice.stride1()] = cpu_matrix(i,j);
390 template <
typename CPU_MATRIX,
typename SCALARTYPE>
392 CPU_MATRIX & cpu_matrix)
394 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
395 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
396 &&
bool(
"Matrix size mismatch!"));
398 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
400 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2();
402 std::vector<SCALARTYPE> entries(num_entries);
405 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
407 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
411 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
412 cpu_matrix(i,j) = entries[j * gpu_matrix_slice.stride2()];
420 template <
typename CPU_MATRIX,
typename SCALARTYPE>
422 CPU_MATRIX & cpu_matrix)
424 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
425 && (cpu_matrix.size2() == gpu_matrix_slice.size2())
426 &&
bool(
"Matrix size mismatch!"));
428 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
430 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1();
432 std::vector<SCALARTYPE> entries(num_entries);
435 for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
437 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
441 for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
442 cpu_matrix(i,j) = entries[i * gpu_matrix_slice.stride1()];
452 template <
typename MatrixType>
455 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
460 template <
typename MatrixType>
463 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
471 template <
typename MatrixType>
474 assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
MatrixType::orientation_category orientation_category
Definition: matrix_proxy.hpp:297
std::size_t vcl_size_t
Definition: forwards.h:58
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'.
Definition: memory.hpp:220
basic_slice slice
Definition: forwards.h:344
Implementations of dense matrix related operations including matrix-vector products.
size_type size() const
Definition: slice.hpp:56
Class for representing strided submatrices of a bigger matrix A.
Definition: forwards.h:358
Implementation of the dense matrix class.
value_type reference
Definition: matrix_proxy.hpp:51
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
Definition: matrix_proxy.hpp:48
matrix_range< MatrixType > project(MatrixType &A, viennacl::range const &r1, viennacl::range const &r2)
Definition: matrix_proxy.hpp:251
const value_type & const_reference
Definition: matrix_proxy.hpp:304
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:647
const value_type & const_reference
Definition: matrix_proxy.hpp:52
range::difference_type difference_type
Definition: matrix_proxy.hpp:50
A dense matrix class.
Definition: forwards.h:290
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
This file provides the forward declarations for the main types used within ViennaCL.
size_type start() const
Definition: slice.hpp:54
A dense matrix class.
Definition: forwards.h:293
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
MatrixType::orientation_category orientation_category
Definition: matrix_proxy.hpp:45
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
Definition: matrix_proxy.hpp:300
MatrixType::value_type value_type
Definition: matrix_proxy.hpp:299
MatrixType::value_type value_type
Definition: matrix_proxy.hpp:47
matrix_range(MatrixType &A, range const &row_range, range const &col_range)
Definition: matrix_proxy.hpp:54
matrix_slice(MatrixType &A, slice const &row_slice, slice const &col_slice)
Definition: matrix_proxy.hpp:306
range::size_type size_type
Definition: matrix_proxy.hpp:49
difference_type stride() const
Definition: slice.hpp:55
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
size_type start() const
Definition: range.hpp:55
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
SizeType size_type
Definition: range.hpp:42
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
value_type reference
Definition: matrix_proxy.hpp:303
size_type size() const
Definition: range.hpp:56
matrix_base()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:255
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
range::difference_type difference_type
Definition: matrix_proxy.hpp:302
DistanceType difference_type
Definition: range.hpp:43
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:339
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix.hpp:654
Implementation of a range object for use with proxy objects.
basic_range range
Definition: forwards.h:339
Class for representing non-strided submatrices of a bigger matrix A.
Definition: forwards.h:355
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:344
range::size_type size_type
Definition: matrix_proxy.hpp:301
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649