Reference documentation for deal.II version 8.1.0
vectorization.h
1 // ---------------------------------------------------------------------
2 // @f$Id: vectorization.h 31259 2013-10-16 14:43:14Z kronbichler @f$
3 //
4 // Copyright (C) 2011 - 2013 by the deal.II authors
5 //
6 // This file is part of the deal.II library.
7 //
8 // The deal.II library is free software; you can use it, redistribute
9 // it, and/or modify it under the terms of the GNU Lesser General
10 // Public License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 // The full text of the license can be found in the file LICENSE at
13 // the top level of the deal.II distribution.
14 //
15 // ---------------------------------------------------------------------
16 
17 
18 #ifndef __deal2__vectorization_h
19 #define __deal2__vectorization_h
20 
21 #include <deal.II/base/config.h>
22 #include <deal.II/base/std_cxx1x/type_traits.h>
24 #include <deal.II/base/memory_consumption.h>
25 #include <deal.II/base/parallel.h>
26 
27 #include <cmath>
28 #include <cstring>
29 
30 // Note:
31 // The flag DEAL_II_COMPILER_VECTORIZATION_LEVEL is essentially constructed
32 // according to the following scheme
33 // #ifdef __AVX__
34 // #define DEAL_II_COMPILER_VECTORIZATION_LEVEL 2
35 // #elif defined (__SSE2__)
36 // #define DEAL_II_COMPILER_VECTORIZATION_LEVEL 1
37 // #else
38 // #define DEAL_II_COMPILER_VECTORIZATION_LEVEL 0
39 // #endif
40 // In addition to checking the flags __AVX__ and __SSE2__, a configure test
41 // ensures that these feature are not only present but also working properly.
42 
43 #if DEAL_II_COMPILER_VECTORIZATION_LEVEL == 2 // AVX
44 #include <immintrin.h>
45 #include <mm_malloc.h>
46 #elif DEAL_II_COMPILER_VECTORIZATION_LEVEL == 1 // SSE2
47 #include <emmintrin.h>
48 #include <mm_malloc.h>
49 #endif
50 
51 
52 
53 // forward declarations
54 namespace dealii
55 {
56  template <typename Number> class VectorizedArray;
57 }
58 namespace std
59 {
60  template <typename Number> ::VectorizedArray<Number>
61  sqrt(const ::VectorizedArray<Number> &);
62  template <typename Number> ::VectorizedArray<Number>
63  abs(const ::VectorizedArray<Number> &);
64  template <typename Number> ::VectorizedArray<Number>
65  max(const ::VectorizedArray<Number> &, const ::VectorizedArray<Number> &);
66  template <typename Number> ::VectorizedArray<Number>
67  min (const ::VectorizedArray<Number> &, const ::VectorizedArray<Number> &);
68 }
69 
70 
72 
73 
74 // for safety, also check that __AVX__ is defined in case the user manually
75 // set some conflicting compile flags which prevent compilation
76 
77 #if DEAL_II_COMPILER_VECTORIZATION_LEVEL == 2 && defined(__AVX__)
78 
82 template <>
84 {
85 public:
90  static const unsigned int n_array_elements = 4;
91 
97  operator = (const double x)
98  {
99  data = _mm256_set1_pd(x);
100  return *this;
101  }
102 
106  double &
107  operator [] (const unsigned int comp)
108  {
109  AssertIndexRange (comp, 4);
110  return *(reinterpret_cast<double *>(&data)+comp);
111  }
112 
116  const double &
117  operator [] (const unsigned int comp) const
118  {
119  AssertIndexRange (comp, 4);
120  return *(reinterpret_cast<const double *>(&data)+comp);
121  }
122 
127  operator += (const VectorizedArray &vec)
128  {
129  // if the compiler supports vector
130  // arithmetics, we can simply use += operator
131  // on the given data type. Otherwise, we need
132  // to use the built-in intrinsic command for
133  // __m256d
134 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
135  data += vec.data;
136 #else
137  data = _mm256_add_pd(data,vec.data);
138 #endif
139  return *this;
140  }
141 
146  operator -= (const VectorizedArray &vec)
147  {
148 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
149  data -= vec.data;
150 #else
151  data = _mm256_sub_pd(data,vec.data);
152 #endif
153  return *this;
154  }
159  operator *= (const VectorizedArray &vec)
160  {
161 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
162  data *= vec.data;
163 #else
164  data = _mm256_mul_pd(data,vec.data);
165 #endif
166  return *this;
167  }
168 
173  operator /= (const VectorizedArray &vec)
174  {
175 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
176  data /= vec.data;
177 #else
178  data = _mm256_div_pd(data,vec.data);
179 #endif
180  return *this;
181  }
182 
188  __m256d data;
189 
190 private:
196  get_sqrt () const
197  {
198  VectorizedArray res;
199  res.data = _mm256_sqrt_pd(data);
200  return res;
201  }
202 
209  get_abs () const
210  {
211  // to compute the absolute value, perform
212  // bitwise andnot with -0. This will leave all
213  // value and exponent bits unchanged but force
214  // the sign value to +.
215  __m256d mask = _mm256_set1_pd (-0.);
216  VectorizedArray res;
217  res.data = _mm256_andnot_pd(mask, data);
218  return res;
219  }
220 
227  get_max (const VectorizedArray &other) const
228  {
229  VectorizedArray res;
230  res.data = _mm256_max_pd (data, other.data);
231  return res;
232  }
233 
240  get_min (const VectorizedArray &other) const
241  {
242  VectorizedArray res;
243  res.data = _mm256_min_pd (data, other.data);
244  return res;
245  }
246 
250  template <typename Number2> friend VectorizedArray<Number2>
251  std::sqrt (const VectorizedArray<Number2> &);
252  template <typename Number2> friend VectorizedArray<Number2>
253  std::abs (const VectorizedArray<Number2> &);
254  template <typename Number2> friend VectorizedArray<Number2>
255  std::max (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
256  template <typename Number2> friend VectorizedArray<Number2>
257  std::min (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
258 };
259 
260 
261 
265 template<>
266 class VectorizedArray<float>
267 {
268 public:
273  static const unsigned int n_array_elements = 8;
274 
280  operator = (const float x)
281  {
282  data = _mm256_set1_ps(x);
283  return *this;
284  }
285 
289  float &
290  operator [] (const unsigned int comp)
291  {
292  AssertIndexRange (comp, 8);
293  return *(reinterpret_cast<float *>(&data)+comp);
294  }
295 
299  const float &
300  operator [] (const unsigned int comp) const
301  {
302  AssertIndexRange (comp, 8);
303  return *(reinterpret_cast<const float *>(&data)+comp);
304  }
305 
310  operator += (const VectorizedArray &vec)
311  {
312 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
313  data += vec.data;
314 #else
315  data = _mm256_add_ps(data,vec.data);
316 #endif
317  return *this;
318  }
319 
324  operator -= (const VectorizedArray &vec)
325  {
326 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
327  data -= vec.data;
328 #else
329  data = _mm256_sub_ps(data,vec.data);
330 #endif
331  return *this;
332  }
337  operator *= (const VectorizedArray &vec)
338  {
339 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
340  data *= vec.data;
341 #else
342  data = _mm256_mul_ps(data,vec.data);
343 #endif
344  return *this;
345  }
346 
351  operator /= (const VectorizedArray &vec)
352  {
353 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
354  data /= vec.data;
355 #else
356  data = _mm256_div_ps(data,vec.data);
357 #endif
358  return *this;
359  }
360 
366  __m256 data;
367 
368 private:
369 
375  get_sqrt () const
376  {
377  VectorizedArray res;
378  res.data = _mm256_sqrt_ps(data);
379  return res;
380  }
387  get_abs () const
388  {
389  // to compute the absolute value, perform
390  // bitwise andnot with -0. This will leave all
391  // value and exponent bits unchanged but force
392  // the sign value to +.
393  __m256 mask = _mm256_set1_ps (-0.f);
394  VectorizedArray res;
395  res.data = _mm256_andnot_ps(mask, data);
396  return res;
397  }
398 
405  get_max (const VectorizedArray &other) const
406  {
407  VectorizedArray res;
408  res.data = _mm256_max_ps (data, other.data);
409  return res;
410  }
411 
418  get_min (const VectorizedArray &other) const
419  {
420  VectorizedArray res;
421  res.data = _mm256_min_ps (data, other.data);
422  return res;
423  }
424 
428  template <typename Number2> friend VectorizedArray<Number2>
429  std::sqrt (const VectorizedArray<Number2> &);
430  template <typename Number2> friend VectorizedArray<Number2>
431  std::abs (const VectorizedArray<Number2> &);
432  template <typename Number2> friend VectorizedArray<Number2>
433  std::max (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
434  template <typename Number2> friend VectorizedArray<Number2>
435  std::min (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
436 };
437 
438 
439 
440 // for safety, also check that __SSE2__ is defined in case the user manually
441 // set some conflicting compile flags which prevent compilation
442 
443 #elif DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
444 
445 
446 
450 template <>
451 class VectorizedArray<double>
452 {
453 public:
458  static const unsigned int n_array_elements = 2;
459 
466  operator = (const double x)
467  {
468  data = _mm_set1_pd(x);
469  return *this;
470  }
471 
475  double &
476  operator [] (const unsigned int comp)
477  {
478  AssertIndexRange (comp, 2);
479  return *(reinterpret_cast<double *>(&data)+comp);
480  }
481 
485  const double &
486  operator [] (const unsigned int comp) const
487  {
488  AssertIndexRange (comp, 2);
489  return *(reinterpret_cast<const double *>(&data)+comp);
490  }
491 
496  operator += (const VectorizedArray &vec)
497  {
498 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
499  data += vec.data;
500 #else
501  data = _mm_add_pd(data,vec.data);
502 #endif
503  return *this;
504  }
505 
510  operator -= (const VectorizedArray &vec)
511  {
512 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
513  data -= vec.data;
514 #else
515  data = _mm_sub_pd(data,vec.data);
516 #endif
517  return *this;
518  }
523  operator *= (const VectorizedArray &vec)
524  {
525 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
526  data *= vec.data;
527 #else
528  data = _mm_mul_pd(data,vec.data);
529 #endif
530  return *this;
531  }
532 
537  operator /= (const VectorizedArray &vec)
538  {
539 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
540  data /= vec.data;
541 #else
542  data = _mm_div_pd(data,vec.data);
543 #endif
544  return *this;
545  }
546 
552  __m128d data;
553 
554 private:
560  get_sqrt () const
561  {
562  VectorizedArray res;
563  res.data = _mm_sqrt_pd(data);
564  return res;
565  }
566 
573  get_abs () const
574  {
575  // to compute the absolute value, perform
576  // bitwise andnot with -0. This will leave all
577  // value and exponent bits unchanged but force
578  // the sign value to +.
579  __m128d mask = _mm_set1_pd (-0.);
580  VectorizedArray res;
581  res.data = _mm_andnot_pd(mask, data);
582  return res;
583  }
584 
591  get_max (const VectorizedArray &other) const
592  {
593  VectorizedArray res;
594  res.data = _mm_max_pd (data, other.data);
595  return res;
596  }
597 
604  get_min (const VectorizedArray &other) const
605  {
606  VectorizedArray res;
607  res.data = _mm_min_pd (data, other.data);
608  return res;
609  }
610 
614  template <typename Number2> friend VectorizedArray<Number2>
615  std::sqrt (const VectorizedArray<Number2> &);
616  template <typename Number2> friend VectorizedArray<Number2>
617  std::abs (const VectorizedArray<Number2> &);
618  template <typename Number2> friend VectorizedArray<Number2>
619  std::max (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
620  template <typename Number2> friend VectorizedArray<Number2>
621  std::min (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
622 };
623 
624 
625 
629 template <>
630 class VectorizedArray<float>
631 {
632 public:
637  static const unsigned int n_array_elements = 4;
638 
645  operator = (const float x)
646  {
647  data = _mm_set1_ps(x);
648  return *this;
649  }
650 
654  float &
655  operator [] (const unsigned int comp)
656  {
657  AssertIndexRange (comp, 4);
658  return *(reinterpret_cast<float *>(&data)+comp);
659  }
660 
664  const float &
665  operator [] (const unsigned int comp) const
666  {
667  AssertIndexRange (comp, 4);
668  return *(reinterpret_cast<const float *>(&data)+comp);
669  }
670 
675  operator += (const VectorizedArray &vec)
676  {
677 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
678  data += vec.data;
679 #else
680  data = _mm_add_ps(data,vec.data);
681 #endif
682  return *this;
683  }
684 
689  operator -= (const VectorizedArray &vec)
690  {
691 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
692  data -= vec.data;
693 #else
694  data = _mm_sub_ps(data,vec.data);
695 #endif
696  return *this;
697  }
702  operator *= (const VectorizedArray &vec)
703  {
704 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
705  data *= vec.data;
706 #else
707  data = _mm_mul_ps(data,vec.data);
708 #endif
709  return *this;
710  }
711 
716  operator /= (const VectorizedArray &vec)
717  {
718 #ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
719  data /= vec.data;
720 #else
721  data = _mm_div_ps(data,vec.data);
722 #endif
723  return *this;
724  }
725 
731  __m128 data;
732 
733 private:
739  get_sqrt () const
740  {
741  VectorizedArray res;
742  res.data = _mm_sqrt_ps(data);
743  return res;
744  }
745 
752  get_abs () const
753  {
754  // to compute the absolute value, perform
755  // bitwise andnot with -0. This will leave all
756  // value and exponent bits unchanged but force
757  // the sign value to +.
758  __m128 mask = _mm_set1_ps (-0.f);
759  VectorizedArray res;
760  res.data = _mm_andnot_ps(mask, data);
761  return res;
762  }
763 
770  get_max (const VectorizedArray &other) const
771  {
772  VectorizedArray res;
773  res.data = _mm_max_ps (data, other.data);
774  return res;
775  }
776 
783  get_min (const VectorizedArray &other) const
784  {
785  VectorizedArray res;
786  res.data = _mm_min_ps (data, other.data);
787  return res;
788  }
789 
793  template <typename Number2> friend VectorizedArray<Number2>
794  std::sqrt (const VectorizedArray<Number2> &);
795  template <typename Number2> friend VectorizedArray<Number2>
796  std::abs (const VectorizedArray<Number2> &);
797  template <typename Number2> friend VectorizedArray<Number2>
798  std::max (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
799  template <typename Number2> friend VectorizedArray<Number2>
800  std::min (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
801 };
802 
803 
804 #endif // if DEAL_II_COMPILER_VECTORIZATION_LEVEL > 0
805 
806 
856 template <typename Number>
858 {
859 public:
864  static const unsigned int n_array_elements = 1;
865 
866  // POD means that there should be no
867  // user-defined constructors, destructors and
868  // copy functions (the standard is somewhat
869  // relaxed in C++2011, though).
870 
877  operator = (const Number scalar)
878  {
879  data = scalar;
880  return *this;
881  }
882 
887  Number &
888  operator [] (const unsigned int comp)
889  {
890  AssertIndexRange (comp, 1);
891  return data;
892  }
893 
898  const Number &
899  operator [] (const unsigned int comp) const
900  {
901  AssertIndexRange (comp, 1);
902  return data;
903  }
904 
910  {
911  data+=vec.data;
912  return *this;
913  }
914 
920  {
921  data-=vec.data;
922  return *this;
923  }
924 
930  {
931  data*=vec.data;
932  return *this;
933  }
934 
940  {
941  data/=vec.data;
942  return *this;
943  }
944 
950  Number data;
951 
952 private:
958  get_sqrt () const
959  {
960  VectorizedArray res;
961  res.data = std::sqrt(data);
962  return res;
963  }
964 
971  get_abs () const
972  {
973  VectorizedArray res;
974  res.data = std::fabs(data);
975  return res;
976  }
977 
984  get_max (const VectorizedArray &other) const
985  {
986  VectorizedArray res;
987  res.data = std::max (data, other.data);
988  return res;
989  }
990 
997  get_min (const VectorizedArray &other) const
998  {
999  VectorizedArray res;
1000  res.data = std::min (data, other.data);
1001  return res;
1002  }
1003 
1007  template <typename Number2> friend VectorizedArray<Number2>
1008  std::sqrt (const VectorizedArray<Number2> &);
1009  template <typename Number2> friend VectorizedArray<Number2>
1010  std::abs (const VectorizedArray<Number2> &);
1011  template <typename Number2> friend VectorizedArray<Number2>
1012  std::max (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
1013  template <typename Number2> friend VectorizedArray<Number2>
1014  std::min (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
1015 };
1016 
1023 template <typename Number>
1024 inline
1026 make_vectorized_array (const Number &u)
1027 {
1028  VectorizedArray<Number> result;
1029  result = u;
1030  return result;
1031 }
1032 
1038 template <typename Number>
1039 inline
1042  const VectorizedArray<Number> &v)
1043 {
1044  VectorizedArray<Number> tmp = u;
1045  return tmp+=v;
1046 }
1047 
1053 template <typename Number>
1054 inline
1057  const VectorizedArray<Number> &v)
1058 {
1059  VectorizedArray<Number> tmp = u;
1060  return tmp-=v;
1061 }
1062 
1068 template <typename Number>
1069 inline
1072  const VectorizedArray<Number> &v)
1073 {
1074  VectorizedArray<Number> tmp = u;
1075  return tmp*=v;
1076 }
1077 
1083 template <typename Number>
1084 inline
1087  const VectorizedArray<Number> &v)
1088 {
1089  VectorizedArray<Number> tmp = u;
1090  return tmp/=v;
1091 }
1092 
1099 template <typename Number>
1100 inline
1102 operator + (const Number &u,
1103  const VectorizedArray<Number> &v)
1104 {
1106  tmp = u;
1107  return tmp+=v;
1108 }
1109 
1118 inline
1120 operator + (const double &u,
1121  const VectorizedArray<float> &v)
1122 {
1124  tmp = u;
1125  return tmp+=v;
1126 }
1127 
1134 template <typename Number>
1135 inline
1138  const Number &u)
1139 {
1140  return u + v;
1141 }
1142 
1151 inline
1154  const double &u)
1155 {
1156  return u + v;
1157 }
1158 
1165 template <typename Number>
1166 inline
1168 operator - (const Number &u,
1169  const VectorizedArray<Number> &v)
1170 {
1172  tmp = u;
1173  return tmp-=v;
1174 }
1175 
1184 inline
1186 operator - (const double &u,
1187  const VectorizedArray<float> &v)
1188 {
1190  tmp = float(u);
1191  return tmp-=v;
1192 }
1193 
1200 template <typename Number>
1201 inline
1204  const Number &u)
1205 {
1207  tmp = u;
1208  return v-tmp;
1209 }
1210 
1219 inline
1222  const double &u)
1223 {
1225  tmp = float(u);
1226  return v-tmp;
1227 }
1228 
1235 template <typename Number>
1236 inline
1238 operator * (const Number &u,
1239  const VectorizedArray<Number> &v)
1240 {
1242  tmp = u;
1243  return tmp*=v;
1244 }
1245 
1254 inline
1256 operator * (const double &u,
1257  const VectorizedArray<float> &v)
1258 {
1260  tmp = float(u);
1261  return tmp*=v;
1262 }
1263 
1270 template <typename Number>
1271 inline
1274  const Number &u)
1275 {
1276  return u * v;
1277 }
1278 
1287 inline
1290  const double &u)
1291 {
1292  return u * v;
1293 }
1294 
1301 template <typename Number>
1302 inline
1304 operator / (const Number &u,
1305  const VectorizedArray<Number> &v)
1306 {
1308  tmp = u;
1309  return tmp/=v;
1310 }
1311 
1320 inline
1322 operator / (const double &u,
1323  const VectorizedArray<float> &v)
1324 {
1326  tmp = float(u);
1327  return tmp/=v;
1328 }
1329 
1336 template <typename Number>
1337 inline
1340  const Number &u)
1341 {
1343  tmp = u;
1344  return v/tmp;
1345 }
1346 
1355 inline
1358  const double &u)
1359 {
1361  tmp = float(u);
1362  return v/tmp;
1363 }
1364 
1370 template <typename Number>
1371 inline
1374 {
1375  return u;
1376 }
1377 
1383 template <typename Number>
1384 inline
1387 {
1388  // to get a negative sign, subtract the input from zero (could also
1389  // multiply by -1, but this one is slightly simpler)
1390  return VectorizedArray<Number>()-u;
1391 }
1392 
1393 
1394 
1400 namespace internal
1401 {
1417  template <typename T>
1419  {
1420  static const std::size_t minimum_parallel_grain_size = 160000/sizeof(T)+1;
1421  public:
1431  AlignedVectorMove (T *source_begin,
1432  T *source_end,
1433  T *destination,
1434  bool copy_only = false)
1435  :
1436  source_ (source_begin),
1437  destination_ (destination),
1438  copy_only_ (copy_only)
1439  {
1440  Assert (source_end >= source_begin, ExcInternalError());
1441  const std::size_t size = source_end - source_begin;
1442  if (size < minimum_parallel_grain_size)
1443  apply_to_subrange (0, size);
1444  else
1445  apply_parallel (0, size, minimum_parallel_grain_size);
1446  }
1447 
1453  virtual void apply_to_subrange (const std::size_t begin,
1454  const std::size_t end) const
1455  {
1456  // for classes trivial assignment can use
1457  // memcpy
1458  if (std_cxx1x::is_trivial<T>::value == true)
1459  std::memcpy (destination_+begin, source_+begin, (end-begin)*sizeof(T));
1460  else if (copy_only_ == false)
1461  for (std::size_t i=begin; i<end; ++i)
1462  {
1463  // initialize memory, copy, and destruct
1464  new (&destination_[i]) T;
1465  destination_[i] = source_[i];
1466  source_[i].~T();
1467  }
1468  else
1469  for (std::size_t i=begin; i<end; ++i)
1470  {
1471  new (&destination_[i]) T;
1472  destination_[i] = source_[i];
1473  }
1474  }
1475 
1476  private:
1477  T *source_;
1478  T *destination_;
1479  const bool copy_only_;
1480  };
1481 
1487  template <typename T>
1489  {
1490  static const std::size_t minimum_parallel_grain_size = 160000/sizeof(T)+1;
1491  public:
1497  AlignedVectorSet (const std::size_t size,
1498  const T &element,
1499  T *destination)
1500  :
1501  element_ (element),
1502  destination_ (destination),
1503  trivial_element (false)
1504  {
1505  if (size == 0)
1506  return;
1507 
1508  if (std_cxx1x::is_trivial<T>::value == true)
1509  {
1510  const unsigned char zero [sizeof(T)] = {};
1511  if (std::memcmp(zero, &element, sizeof(T)) == 0)
1512  trivial_element = true;
1513  }
1514  if (size < minimum_parallel_grain_size)
1515  apply_to_subrange (0, size);
1516  else
1517  apply_parallel (0, size, minimum_parallel_grain_size);
1518  }
1519 
1520  private:
1521 
1526  virtual void apply_to_subrange (const std::size_t begin,
1527  const std::size_t end) const
1528  {
1529  // for classes with trivial assignment of zero
1530  // can use memset
1531  if (std_cxx1x::is_trivial<T>::value == true && trivial_element)
1532  std::memset (destination_+begin, 0, (end-begin)*sizeof(T));
1533  else
1534  for (std::size_t i=begin; i<end; ++i)
1535  {
1536  // initialize memory and set
1537  new (&destination_[i]) T;
1538  destination_[i] = element_;
1539  }
1540  }
1541 
1542  const T &element_;
1543  mutable T *destination_;
1544  bool trivial_element;
1545  };
1546 } // end of namespace internal
1547 
1548 
1562 template < class T >
1564 {
1565 public:
1572  typedef T value_type;
1573  typedef value_type *pointer;
1574  typedef const value_type *const_pointer;
1575  typedef value_type *iterator;
1576  typedef const value_type *const_iterator;
1577  typedef value_type &reference;
1578  typedef const value_type &const_reference;
1579  typedef std::size_t size_type;
1580 
1586  :
1587  _data (0),
1588  _end_data (0),
1589  _end_allocated (0)
1590  {};
1591 
1596  AlignedVector (const size_type size)
1597  :
1598  _data (0),
1599  _end_data (0),
1600  _end_allocated (0)
1601  {
1602  if (size > 0)
1603  resize (size);
1604  }
1605 
1610  {
1611  clear();
1612  }
1613 
1618  :
1619  _data (0),
1620  _end_data (0),
1621  _end_allocated (0)
1622  {
1623  // do not invalidate old data
1624  resize_fast (vec._end_data - vec._data);
1626  }
1627 
1631  AlignedVector &
1633  {
1634  clear();
1635  resize_fast (vec._end_data - vec._data);
1637  return *this;
1638  }
1639 
1646  void resize_fast (const size_type size)
1647  {
1648  reserve (size);
1649  _end_data = _data + size;
1650  }
1651 
1660  void resize (const size_type size_in,
1661  const T &init = T())
1662  {
1663  const size_type old_size = size();
1664  if (std_cxx1x::is_trivial<T>::value == false && size_in < old_size)
1665  {
1666  // call destructor on fields that are released
1667  while (_end_data != _data+size_in)
1668  (--_end_data)->~T();
1669  }
1670 
1671  resize_fast (size_in);
1672  // now _size is set correctly, need to set the
1673  // values
1674  if (size_in > old_size)
1675  internal::AlignedVectorSet<T> (size_in-old_size, init,
1676  _data+old_size);
1677  }
1678 
1691  void reserve (const size_type size_alloc)
1692  {
1693  const size_type old_size = _end_data - _data;
1694  const size_type allocated_size = _end_allocated - _data;
1695  if (size_alloc > allocated_size)
1696  {
1697  // if we continuously increase the size of the
1698  // vector, we might be reallocating a lot of
1699  // times. therefore, try to increase the size
1700  // more aggressively
1701  size_type new_size = size_alloc;
1702  if (size_alloc < (2 * allocated_size))
1703  new_size = 2 * allocated_size;
1704 
1705  const size_type size_actual_allocate = new_size * sizeof(T);
1706 
1707 #if DEAL_II_COMPILER_VECTORIZATION_LEVEL > 0
1708 
1709  // allocate and align along boundaries of the
1710  // size of VectorizedArray<double>, which is
1711  // 16 bytes for SSE and 32 bytes for AVX
1712  T *new_data = static_cast<T *>(_mm_malloc (size_actual_allocate,
1713  sizeof(VectorizedArray<double>)));
1714 #else
1715  T *new_data = static_cast<T *>(malloc (size_actual_allocate));
1716 #endif
1717  if (new_data == 0)
1718  throw std::bad_alloc();
1719 
1720  // copy data in case there was some content
1721  // before and release the old memory with the
1722  // function corresponding to the one used for
1723  // allocating
1724  std::swap (_data, new_data);
1725  _end_data = _data + old_size;
1726  _end_allocated = _data + new_size;
1727  if (_end_data != _data)
1728  {
1729  internal::AlignedVectorMove<T>(new_data, new_data + old_size,
1730  _data);
1731 #if DEAL_II_COMPILER_VECTORIZATION_LEVEL > 0
1732  _mm_free(new_data);
1733 #else
1734  free(new_data);
1735 #endif
1736  }
1737  }
1738  else if (size_alloc == 0)
1739  clear();
1740  }
1741 
1748  void clear ()
1749  {
1750  if (_data != 0)
1751  {
1752  if (std_cxx1x::is_trivial<T>::value == false)
1753  while (_end_data != _data)
1754  (--_end_data)->~T();
1755 
1756 #if DEAL_II_COMPILER_VECTORIZATION_LEVEL > 0
1757  _mm_free(_data);
1758 #else
1759  free(_data);
1760 #endif
1761  }
1762  _data = 0;
1763  _end_data = 0;
1764  _end_allocated = 0;
1765  };
1766 
1774  void push_back (const T in_data)
1775  {
1777  if (_end_data == _end_allocated)
1778  reserve (std::max(2*capacity(),static_cast<size_type>(16)));
1779  if (std_cxx1x::is_trivial<T>::value == false)
1780  new (_end_data) T;
1781  *_end_data++ = in_data;
1782  }
1783 
1788  reference back ()
1789  {
1790  AssertIndexRange (0, size());
1791  T *field = _end_data - 1;
1792  return *field;
1793  }
1794 
1799  const_reference back () const
1800  {
1801  AssertIndexRange (0, size());
1802  const T *field = _end_data - 1;
1803  return *field;
1804  }
1805 
1810  template <typename ForwardIterator>
1811  void insert_back (ForwardIterator begin,
1812  ForwardIterator end)
1813  {
1814  const unsigned int old_size = size();
1815  reserve (old_size + (end-begin));
1816  for ( ; begin != end; ++begin, ++_end_data)
1817  {
1818  if (std_cxx1x::is_trivial<T>::value == false)
1819  new (_end_data) T;
1820  *_end_data = *begin;
1821  }
1822  }
1823 
1829  {
1830  std::swap (_data, vec._data);
1831  std::swap (_end_data, vec._end_data);
1832  std::swap (_end_allocated, vec._end_allocated);
1833  }
1834 
1838  size_type size () const
1839  {
1840  return _end_data - _data;
1841  }
1842 
1849  size_type capacity () const
1850  {
1851  return _end_allocated - _data;
1852  }
1853 
1858  reference
1859  operator [] (const size_type index)
1860  {
1861  AssertIndexRange (index, size());
1862  return _data[index];
1863  };
1864 
1869  const_reference operator [] (const size_type index) const
1870  {
1871  AssertIndexRange (index, size());
1872  return _data[index];
1873  };
1874 
1879  iterator begin ()
1880  {
1881  return _data;
1882  }
1883 
1888  iterator end ()
1889  {
1890  return _end_data;
1891  }
1892 
1897  const_iterator begin () const
1898  {
1899  return _data;
1900  }
1901 
1906  const_iterator end () const
1907  {
1908  return _end_data;
1909  }
1910 
1917  size_type memory_consumption () const
1918  {
1919  size_type memory = sizeof(this);
1920  memory += sizeof(T) * capacity();
1921  return memory;
1922  }
1923 
1924 private:
1925 
1929  T *_data;
1930 
1935 
1940 };
1941 
1942 
1943 DEAL_II_NAMESPACE_CLOSE
1944 
1945 
1952 namespace std
1953 {
1961  template <typename Number>
1962  inline
1963  ::VectorizedArray<Number>
1964  sin (const ::VectorizedArray<Number> &x)
1965  {
1966  ::VectorizedArray<Number> sin_val;
1967  for (unsigned int i=0; i<::VectorizedArray<Number>::n_array_elements; ++i)
1968  sin_val[i] = std::sin(x[i]);
1969  return sin_val;
1970  }
1971 
1972 
1973 
1981  template <typename Number>
1982  inline
1983  ::VectorizedArray<Number>
1984  tan (const ::VectorizedArray<Number> &x)
1985  {
1986  ::VectorizedArray<Number> tan_val;
1987  for (unsigned int i=0; i<::VectorizedArray<Number>::n_array_elements; ++i)
1988  tan_val[i] = std::tan(x[i]);
1989  return tan_val;
1990  }
1991 
1992 
2000  template <typename Number>
2001  inline
2002  ::VectorizedArray<Number>
2003  cos (const ::VectorizedArray<Number> &x)
2004  {
2005  ::VectorizedArray<Number> cos_val;
2006  for (unsigned int i=0; i<::VectorizedArray<Number>::n_array_elements; ++i)
2007  cos_val[i] = std::cos(x[i]);
2008  return cos_val;
2009  }
2010 
2011 
2019  template <typename Number>
2020  inline
2021  ::VectorizedArray<Number>
2022  exp (const ::VectorizedArray<Number> &x)
2023  {
2024  ::VectorizedArray<Number> exp_val;
2025  for (unsigned int i=0; i<::VectorizedArray<Number>::n_array_elements; ++i)
2026  exp_val[i] = std::exp(x[i]);
2027  return exp_val;
2028  }
2029 
2030 
2038  template <typename Number>
2039  inline
2040  ::VectorizedArray<Number>
2041  log (const ::VectorizedArray<Number> &x)
2042  {
2043  ::VectorizedArray<Number> log_val;
2044  for (unsigned int i=0; i<::VectorizedArray<Number>::n_array_elements; ++i)
2045  log_val[i] = std::log(x[i]);
2046  return log_val;
2047  }
2048 
2049 
2050 
2058  template <typename Number>
2059  inline
2060  ::VectorizedArray<Number>
2061  sqrt (const ::VectorizedArray<Number> &x)
2062  {
2063  return x.get_sqrt();
2064  }
2065 
2066 
2067 
2075  template <typename Number>
2076  inline
2077  ::VectorizedArray<Number>
2078  abs (const ::VectorizedArray<Number> &x)
2079  {
2080  return x.get_abs();
2081  }
2082 
2083 
2084 
2092  template <typename Number>
2093  inline
2094  ::VectorizedArray<Number>
2095  max (const ::VectorizedArray<Number> &x,
2096  const ::VectorizedArray<Number> &y)
2097  {
2098  return x.get_max(y);
2099  }
2100 
2101 
2102 
2110  template <typename Number>
2111  inline
2112  ::VectorizedArray<Number>
2113  min (const ::VectorizedArray<Number> &x,
2114  const ::VectorizedArray<Number> &y)
2115  {
2116  return x.get_min(y);
2117  }
2118 
2119 }
2120 
2121 #endif
VectorizedArray< Number > operator/(const VectorizedArray< Number > &u, const VectorizedArray< Number > &v)
VectorizedArray< Number > operator+(const VectorizedArray< Number > &u, const VectorizedArray< Number > &v)
void apply_parallel(const std::size_t begin, const std::size_t end, const std::size_t minimum_parallel_grain_size) const
Definition: parallel.h:855
VectorizedArray< Number > operator-(const VectorizedArray< Number > &u, const VectorizedArray< Number > &v)
VectorizedArray< Number > log(const ::VectorizedArray< Number > &x)
VectorizedArray< Number > operator*(const VectorizedArray< Number > &u, const VectorizedArray< Number > &v)
virtual void apply_to_subrange(const std::size_t begin, const std::size_t end) const
size_type capacity() const
VectorizedArray< Number > make_vectorized_array(const Number &u)
VectorizedArray< Number > tan(const ::VectorizedArray< Number > &x)
#define AssertIndexRange(index, range)
Definition: exceptions.h:888
size_type memory_consumption() const
AlignedVector & operator=(const AlignedVector< T > &vec)
STL namespace.
void reserve(const size_type size_alloc)
VectorizedArray< Number > exp(const ::VectorizedArray< Number > &x)
VectorizedArray get_sqrt() const
void push_back(const T in_data)
virtual void apply_to_subrange(const std::size_t begin, const std::size_t end) const
AlignedVector(const size_type size)
reference operator[](const size_type index)
size_type size() const
VectorizedArray & operator+=(const VectorizedArray< Number > &vec)
VectorizedArray get_min(const VectorizedArray &other) const
Number & operator[](const unsigned int comp)
VectorizedArray get_abs() const
static const unsigned int n_array_elements
const_reference back() const
VectorizedArray< Number > min(const ::VectorizedArray< Number > &x, const ::VectorizedArray< Number > &y)
#define Assert(cond, exc)
Definition: exceptions.h:299
void resize(const size_type size_in, const T &init=T())
void insert_back(ForwardIterator begin, ForwardIterator end)
VectorizedArray & operator-=(const VectorizedArray< Number > &vec)
void swap(AlignedVector< T > &vec)
VectorizedArray< Number > sqrt(const ::VectorizedArray< Number > &x)
VectorizedArray< Number > sin(const ::VectorizedArray< Number > &x)
void resize_fast(const size_type size)
VectorizedArray get_max(const VectorizedArray &other) const
VectorizedArray & operator/=(const VectorizedArray< Number > &vec)
iterator end()
iterator begin()
const_iterator end() const
AlignedVectorMove(T *source_begin, T *source_end, T *destination, bool copy_only=false)
const_iterator begin() const
VectorizedArray & operator*=(const VectorizedArray< Number > &vec)
AlignedVector(const AlignedVector< T > &vec)
VectorizedArray & operator=(const Number scalar)
::ExceptionBase & ExcInternalError()
VectorizedArray< Number > abs(const ::VectorizedArray< Number > &x)
AlignedVectorSet(const std::size_t size, const T &element, T *destination)
VectorizedArray< Number > max(const ::VectorizedArray< Number > &x, const ::VectorizedArray< Number > &y)
VectorizedArray< Number > cos(const ::VectorizedArray< Number > &x)
reference back()