Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_FixedHashTable_def.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
45 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
46 
47 #include "Tpetra_Details_FixedHashTable_decl.hpp"
48 #ifdef TPETRA_USE_MURMUR_HASH
49 # include <Kokkos_Functional.hpp> // hash function used by Kokkos::UnorderedMap
50 #endif // TPETRA_USE_MURMUR_HASH
51 #include <limits> // std::numeric_limits
52 #include <type_traits>
53 
54 namespace Tpetra {
55 namespace Details {
56 //
57 // This namespace stores utility functions and Kokkos
58 // functors for use in FixedHashTable construction.
59 //
60 namespace FHT {
61 
62 // Is it worth actually using building the FixedHashTable using
63 // parallel threads, instead of just counting in a sequential loop?
64 //
65 // The parallel version of FixedHashTable construction isn't just a
66 // parallelization of the sequential loops. It incurs additional
67 // overheads. For example, the CountBuckets kernel uses atomic update
68 // instructions to count the number of "buckets" per offsets array
69 // (ptr) entry. Atomic updates have overhead, even if only one thread
70 // issues them. The Kokkos kernels are still correct in that case,
71 // but I would rather not incur overhead then. It might make sense to
72 // set the minimum number of threads to something greater than 1, but
73 // we would need experiments to find out.
74 //
75 // FixedHashTable code should call the nonmember function below, that
76 // has the same name but starts with a lower-case w.
77 template<class ExecSpace>
78 struct WorthBuildingFixedHashTableInParallel {
79  typedef typename ExecSpace::execution_space execution_space;
80 
81  static bool isWorth () {
82  // NOTE: Kokkos::Cuda does NOT have this method. That's why we
83  // need the partial specialization below.
84  return execution_space::max_hardware_threads () > 1;
85  }
86 };
87 
88 #ifdef KOKKOS_HAVE_CUDA
89 template<>
90 struct WorthBuildingFixedHashTableInParallel<Kokkos::Cuda> {
91  // There could be more complicated expressions for whether this is
92  // actually worthwhile, but for now I'll just say that with Cuda, we
93  // will ALWAYS count buckets in parallel (that is, run a Kokkos
94  // parallel kernel).
95  static bool isWorth () {
96  return true;
97  }
98 };
99 #endif // KOKKOS_HAVE_CUDA
100 
101 // Is it worth actually using building the FixedHashTable using
102 // parallel threads, instead of just counting in a sequential loop?
103 //
104 // The parallel version of FixedHashTable construction isn't just a
105 // parallelization of the sequential loops. It incurs additional
106 // overheads. For example, the CountBuckets kernel uses atomic update
107 // instructions to count the number of "buckets" per offsets array
108 // (ptr) entry. Atomic updates have overhead, even if only one thread
109 // issues them. The Kokkos kernels are still correct in that case,
110 // but I would rather not incur overhead then. It might make sense to
111 // set the minimum number of threads to something greater than 1, but
112 // we would need experiments to find out.
113 template<class ExecSpace>
114 bool worthBuildingFixedHashTableInParallel () {
115  return WorthBuildingFixedHashTableInParallel<ExecSpace>::isWorth ();
116 }
117 
118 // If the input kokkos::View<const KeyType*, ArrayLayout,
119 // InputExecSpace, Kokkos::MemoryUnamanged> is NOT accessible from the
120 // OutputExecSpace execution space, make and return a deep copy.
121 // Otherwise, just return the original input.
122 //
123 // The point of this is to avoid unnecessary copies, when the input
124 // array of keys comes in as a Teuchos::ArrayView (which we wrap in an
125 // unmanaged Kokkos::View).
126 template<class KeyType,
127  class ArrayLayout,
128  class InputExecSpace,
129  class OutputExecSpace,
130  const bool mustDeepCopy =
131  ! std::is_same<typename InputExecSpace::memory_space,
132  typename OutputExecSpace::memory_space>::value>
133 struct DeepCopyIfNeeded {
134  // The default implementation is trivial; all the work happens in
135  // partial specializations.
136 };
137 
138 // Specialization for when a deep copy is actually needed.
139 template<class KeyType,
140  class ArrayLayout,
141  class InputExecSpace,
142  class OutputExecSpace>
143 struct DeepCopyIfNeeded<KeyType, ArrayLayout, InputExecSpace, OutputExecSpace, true>
144 {
145  typedef Kokkos::View<const KeyType*, ArrayLayout,
146  InputExecSpace, Kokkos::MemoryUnmanaged> input_view_type;
147  // In this case, a deep copy IS needed. As a result, the output
148  // type is a managed Kokkos::View, which differs from the input
149  // type. Clients must get the correct return type from this struct,
150  // either from the typedef below or from 'auto'. Assigning an
151  // unmanaged View to a managed View is a syntax error.
152  typedef Kokkos::View<const KeyType*, ArrayLayout, OutputExecSpace> output_view_type;
153 
154  static output_view_type copy (const input_view_type& src) {
155  typedef typename output_view_type::non_const_type NC;
156 
157  NC dst (Kokkos::ViewAllocateWithoutInitializing (src.tracker ().label ()),
158  src.dimension_0 ());
159  Kokkos::deep_copy (dst, src);
160  return output_view_type (dst);
161  }
162 };
163 
164 // Specialization if no need to make a deep copy.
165 template<class KeyType,
166  class ArrayLayout,
167  class InputExecSpace,
168  class OutputExecSpace>
169 struct DeepCopyIfNeeded<KeyType, ArrayLayout, InputExecSpace, OutputExecSpace, false> {
170  typedef Kokkos::View<const KeyType*, ArrayLayout,
171  InputExecSpace, Kokkos::MemoryUnmanaged> input_view_type;
172  typedef Kokkos::View<const KeyType*, ArrayLayout, OutputExecSpace,
173  Kokkos::MemoryUnmanaged> output_view_type;
174 
175  static output_view_type copy (const input_view_type& src) {
176  return output_view_type (src);
177  }
178 };
179 
180 //
181 // Functors for FixedHashTable initialization
182 //
183 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
184 // should consider replacing all of these functors with in-line
185 // lambdas. The only issue is that we would need to bake the
186 // execution space into the policy, since the default execution space
187 // might differ from the one Tpetra wants to use.
188 
198 template<class CountsViewType,
199  class KeysViewType,
200  class SizeType = typename KeysViewType::size_type>
202 public:
203  typedef CountsViewType counts_view_type;
204  typedef KeysViewType keys_view_type;
205  typedef typename CountsViewType::execution_space execution_space;
206  typedef typename CountsViewType::memory_space memory_space;
207  typedef SizeType size_type;
208  typedef typename keys_view_type::non_const_value_type key_type;
209  // mfh 21 May 2015: Having a device_type typedef in the functor
210  // along with an execution_space typedef causes compilation issues.
211  // This is because one of Kokkos' partial specializations picks up
212  // on the device_type typedef, and another picks up on the
213  // execution_space typedef. The former is a legacy of a previous
214  // design iteration of Kokkos, which did not separate memory and
215  // execution spaces.
217 
223  CountBuckets (const counts_view_type& counts,
224  const keys_view_type& keys,
225  const size_type size) :
226  counts_ (counts),
227  keys_ (keys),
228  size_ (size)
229  {}
230 
235  KOKKOS_INLINE_FUNCTION void
236  operator () (const size_type& i) const
237  {
238  typedef typename hash_type::result_type hash_value_type;
239 
240  const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
241  Kokkos::atomic_fetch_add (&counts_[hashVal], 1);
242  }
243 
244 private:
246  counts_view_type counts_;
248  keys_view_type keys_;
250  size_type size_;
251 };
252 
264 template<class OffsetsViewType,
265  class SizeType = typename OffsetsViewType::size_type>
267 public:
268  typedef OffsetsViewType offsets_view_type;
269  typedef typename OffsetsViewType::const_type counts_view_type;
270  typedef typename OffsetsViewType::execution_space execution_space;
271  typedef typename OffsetsViewType::memory_space memory_space;
272  typedef SizeType size_type;
273  typedef typename OffsetsViewType::non_const_value_type value_type;
274 
280  ComputeRowOffsets (const offsets_view_type& offsets,
281  const counts_view_type& counts) :
282  offsets_ (offsets),
283  counts_ (counts),
284  size_ (counts.dimension_0 ())
285  {}
286 
288  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
289  {
290  dst = 0;
291  }
292 
293  KOKKOS_INLINE_FUNCTION void
294  join (volatile value_type& dst,
295  const volatile value_type& src) const
296  {
297  dst += src;
298  }
299 
300  KOKKOS_INLINE_FUNCTION void
301  operator () (const size_type& i, value_type& update, const bool final) const
302  {
303  if (final) {
304  offsets_[i] = update;
305  }
306  if (i < size_) {
307  update += counts_[i];
308  }
309  }
310 
311 private:
313  offsets_view_type offsets_;
315  counts_view_type counts_;
317  size_type size_;
318 };
319 
330 template<class KeyType>
332  FillPairsResult () :
333  minKey_ (std::numeric_limits<KeyType>::max ()),
334  // min() for a floating-point type returns the minimum _positive_
335  // normalized value. This is different than for integer types.
336  // lowest() is new in C++11 and returns the least value, always
337  // negative for signed finite types.
338  //
339  // mfh 23 May 2015: I have heard reports that
340  // std::numeric_limits<int>::lowest() does not exist with the
341  // Intel compiler. I'm not sure if the users in question actually
342  // enabled C++11. However, it's easy enough to work around this
343  // issue. The standard floating-point types are signed and have a
344  // sign bit, so lowest() is just -max(). For integer types, we
345  // can use min() instead.
346  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
347  std::numeric_limits<KeyType>::min () :
348  -std::numeric_limits<KeyType>::max ()),
349  success_ (true)
350  {
351  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
352  "KeyType must be some kind of number type.");
353  }
354 
355  FillPairsResult (const KeyType& initMinKey,
356  const KeyType& initMaxKey) :
357  minKey_ (initMinKey),
358  maxKey_ (initMaxKey),
359  success_ (true)
360  {
361  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
362  "KeyType must be some kind of number type.");
363  }
364 
365  KeyType minKey_;
366  KeyType maxKey_;
367  bool success_;
368 };
369 
399 template<class PairsViewType,
400  class KeysViewType,
401  class CountsViewType,
402  class SizeType = typename KeysViewType::size_type>
403 class FillPairs {
404 public:
405  typedef typename CountsViewType::non_const_type counts_view_type;
406  typedef typename counts_view_type::const_type offsets_view_type;
407 
408  typedef PairsViewType pairs_view_type;
409  typedef typename KeysViewType::const_type keys_view_type;
410  typedef typename offsets_view_type::execution_space execution_space;
411  typedef typename offsets_view_type::memory_space memory_space;
412  typedef SizeType size_type;
413 
414  typedef typename keys_view_type::non_const_value_type key_type;
415  typedef typename pairs_view_type::non_const_value_type pair_type;
416 
418 
419  // mfh 23 May 2015: Having a device_type typedef in the functor
420  // along with an execution_space typedef causes compilation issues.
421  // This is because one of Kokkos' partial specializations picks up
422  // on the device_type typedef, and another picks up on the
423  // execution_space typedef. The former is a legacy of a previous
424  // design iteration of Kokkos, which did not separate memory and
425  // execution spaces.
427 
438  FillPairs (const pairs_view_type& pairs,
439  const counts_view_type& counts,
440  const offsets_view_type& ptr,
441  const keys_view_type& keys,
442  const typename pair_type::second_type startingValue) :
443  pairs_ (pairs),
444  counts_ (counts),
445  ptr_ (ptr),
446  keys_ (keys),
447  size_ (counts.dimension_0 ()),
448  startingValue_ (startingValue),
449  initMinKey_ (std::numeric_limits<key_type>::max ()),
450  initMaxKey_ (std::numeric_limits<key_type>::is_integer ?
451  std::numeric_limits<key_type>::min () :
452  -std::numeric_limits<key_type>::max ())
453  {}
454 
473  FillPairs (const pairs_view_type& pairs,
474  const counts_view_type& counts,
475  const offsets_view_type& ptr,
476  const keys_view_type& keys,
477  const typename pair_type::second_type startingValue,
478  const key_type initMinKey,
479  const key_type initMaxKey) :
480  pairs_ (pairs),
481  counts_ (counts),
482  ptr_ (ptr),
483  keys_ (keys),
484  size_ (counts.dimension_0 ()),
485  startingValue_ (startingValue),
486  initMinKey_ (initMinKey),
487  initMaxKey_ (initMaxKey)
488  {}
489 
491  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
492  {
493  dst.minKey_ = initMinKey_;
494  dst.maxKey_ = initMaxKey_;
495  dst.success_ = true;
496  }
497 
498  KOKKOS_INLINE_FUNCTION void
499  join (volatile value_type& dst,
500  const volatile value_type& src) const
501  {
502  if (src.maxKey_ > dst.maxKey_) {
503  dst.maxKey_ = src.maxKey_;
504  }
505  if (src.minKey_ < dst.minKey_) {
506  dst.minKey_ = src.minKey_;
507  }
508  dst.success_ = dst.success_ && src.success_;
509  }
510 
515  KOKKOS_INLINE_FUNCTION void
516  operator () (const size_type& i, value_type& dst) const
517  {
518  typedef typename hash_type::result_type hash_value_type;
519  typedef typename offsets_view_type::non_const_value_type offset_type;
520  typedef typename pair_type::second_type val_type;
521 
522  const key_type key = keys_[i];
523  if (key > dst.maxKey_) {
524  dst.maxKey_ = key;
525  }
526  if (key < dst.minKey_) {
527  dst.minKey_ = key;
528  }
529  const val_type theVal = startingValue_ + static_cast<val_type> (i);
530  const hash_value_type hashVal = hash_type::hashFunc (key, size_);
531 
532  // Return the old count; decrement afterwards.
533  const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], -1);
534  if (count == 0) {
535  dst.success_ = false; // FAILURE!
536  }
537  else {
538  const offset_type curPos = ptr_[hashVal+1] - count;
539 
540  pairs_[curPos].first = key;
541  pairs_[curPos].second = theVal;
542  }
543  }
544 
545 private:
546  pairs_view_type pairs_;
547  counts_view_type counts_;
548  offsets_view_type ptr_;
549  keys_view_type keys_;
550  size_type size_;
551  typename pair_type::second_type startingValue_;
553  key_type initMinKey_;
555  key_type initMaxKey_;
556 };
557 
581 template<class OffsetsViewType,
582  class PairsViewType,
583  class SizeType = typename OffsetsViewType::size_type>
585 public:
586  typedef typename OffsetsViewType::const_type offsets_view_type;
587  typedef typename PairsViewType::const_type pairs_view_type;
588  typedef typename offsets_view_type::execution_space execution_space;
589  typedef typename offsets_view_type::memory_space memory_space;
590  typedef SizeType size_type;
591 
592  // The result of the check is whether the table has one or more duplicates.
593  typedef bool value_type;
594 
599  CheckForDuplicateKeys (const pairs_view_type& pairs,
600  const offsets_view_type& ptr) :
601  pairs_ (pairs),
602  ptr_ (ptr),
603  size_ (ptr_.dimension_0 () == 0 ?
604  size_type (0) :
605  ptr_.dimension_0 () - 1)
606  {}
607 
609  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
610  {
611  dst = false;
612  }
613 
615  KOKKOS_INLINE_FUNCTION void
616  join (volatile value_type& dst,
617  const volatile value_type& src) const
618  {
619  dst = dst || src;
620  }
621 
623  KOKKOS_INLINE_FUNCTION void
624  operator () (const size_type& i, value_type& dst) const
625  {
626  typedef typename offsets_view_type::non_const_value_type offset_type;
627  typedef typename pairs_view_type::non_const_value_type pair_type;
628  typedef typename pair_type::first_type key_type;
629 
630  if (dst) {
631  return; // we've already found duplicate keys elsewhere
632  }
633  else {
634  const offset_type beg = ptr_[i];
635  const offset_type end = ptr_[i+1];
636  bool foundDuplicateKey = false;
637  // This is an ~ n^2 algorithm in the worst case, where n is the
638  // max number of keys that hash to the same bucket. However, if
639  // the hash function is reasonable, n should be much less than
640  // the total number of keys.
641  for (offset_type j = beg + 1; j < end; ++j) {
642  const key_type curKey = pairs_[j].first;
643  for (offset_type k = beg; k < j; ++k) {
644  if (pairs_[k].first == curKey) {
645  foundDuplicateKey = true;
646  break;
647  }
648  }
649  }
650  dst = dst || foundDuplicateKey;
651  }
652  }
653 
654 private:
655  pairs_view_type pairs_;
656  offsets_view_type ptr_;
657  size_type size_;
658 };
659 
660 } // namespace FHT
661 
662 //
663 // Here begins the actual implementation of FixedHashTable.
664 //
665 
666 template<class KeyType, class ValueType, class DeviceType>
667 bool
669 hasKeys () const {
670  // This also works if FixedHashTable has no entries. getKey()
671  // works in that case, but always returns the flag (invalid).
672  //
673  // FIXME (31 May 2015) This only works because vals_ contains no
674  // padding. If we ever pad within a "row" of vals_, we'll have to
675  // change this.
676  return keys_.dimension_0 () == val_.dimension_0 ();
677 }
678 
679 template<class KeyType, class ValueType, class DeviceType>
680 void
682 check () const
683 {
684  // const char prefix[] = "Tpetra::Details::FixedHashTable: ";
685  // const char suffix[] = " Please report this bug to the Tpetra developers.";
686 }
687 
688 template<class KeyType, class ValueType, class DeviceType>
691  minKey_ (std::numeric_limits<KeyType>::max ()),
692  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
693  std::numeric_limits<KeyType>::min () :
694  -std::numeric_limits<KeyType>::max ()),
695  minVal_ (std::numeric_limits<ValueType>::max ()),
696  maxVal_ (std::numeric_limits<ValueType>::is_integer ?
697  std::numeric_limits<ValueType>::min () :
698  -std::numeric_limits<ValueType>::max ()),
699  firstContigKey_ (std::numeric_limits<KeyType>::max ()),
700  lastContigKey_ (std::numeric_limits<KeyType>::is_integer ?
701  std::numeric_limits<KeyType>::min () :
702  -std::numeric_limits<KeyType>::max ()),
703  contiguousValues_ (true), // trivially
704  checkedForDuplicateKeys_ (true), // it's an empty table; no need to check
705  hasDuplicateKeys_ (false)
706 {
707 #ifdef HAVE_TPETRA_DEBUG
708  check ();
709 #endif // HAVE_TPETRA_DEBUG
710 }
711 
712 template<class KeyType, class ValueType, class DeviceType>
714 FixedHashTable (const keys_type& keys) :
715  keys_ (keys),
716  minKey_ (std::numeric_limits<KeyType>::max ()), // to be set in init()
717  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
718  std::numeric_limits<KeyType>::min () :
719  -std::numeric_limits<KeyType>::max ()), // to be set in init()
720  minVal_ (0),
721  maxVal_ (keys.size () == 0 ?
722  static_cast<ValueType> (0) :
723  static_cast<ValueType> (keys.size () - 1)),
724  firstContigKey_ (std::numeric_limits<KeyType>::max ()),
725  lastContigKey_ (std::numeric_limits<KeyType>::is_integer ?
726  std::numeric_limits<KeyType>::min () :
727  -std::numeric_limits<KeyType>::max ()),
728  contiguousValues_ (true),
729  checkedForDuplicateKeys_ (false),
730  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
731 {
732  const ValueType startingValue = static_cast<ValueType> (0);
733  const KeyType initMinKey = this->minKey_;
734  const KeyType initMaxKey = this->maxKey_;
735  this->init (keys, startingValue, initMinKey, initMaxKey,
736  initMinKey, initMinKey, false);
737 
738 #ifdef HAVE_TPETRA_DEBUG
739  check ();
740 #endif // HAVE_TPETRA_DEBUG
741 }
742 
743 template<class KeyType, class ValueType, class DeviceType>
745 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
746  const bool keepKeys) :
747  minKey_ (std::numeric_limits<KeyType>::max ()), // to be set in init()
748  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
749  std::numeric_limits<KeyType>::min () :
750  -std::numeric_limits<KeyType>::max ()), // to be set in init()
751  minVal_ (0),
752  maxVal_ (keys.size () == 0 ?
753  static_cast<ValueType> (0) :
754  static_cast<ValueType> (keys.size () - 1)),
755  firstContigKey_ (std::numeric_limits<KeyType>::max ()),
756  lastContigKey_ (std::numeric_limits<KeyType>::is_integer ?
757  std::numeric_limits<KeyType>::min () :
758  -std::numeric_limits<KeyType>::max ()),
759  contiguousValues_ (true),
760  checkedForDuplicateKeys_ (false),
761  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
762 {
763  typedef typename keys_type::non_const_type nonconst_keys_type;
764 
765  // mfh 01 May 2015: I don't trust that
766  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
767  // so I ensure this manually.
768  const ValueType startingValue = static_cast<ValueType> (0);
769  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
770  keys.size ());
771  using Kokkos::ViewAllocateWithoutInitializing;
772  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
773  keys_k.dimension_0 ());
774  Kokkos::deep_copy (keys_d, keys_k);
775  const KeyType initMinKey = this->minKey_;
776  const KeyType initMaxKey = this->maxKey_;
777  this->init (keys_d, startingValue, initMinKey, initMaxKey,
778  initMinKey, initMinKey, false);
779  if (keepKeys) {
780  keys_ = keys_d;
781 #ifdef HAVE_TPETRA_DEBUG
782  typedef typename keys_type::size_type size_type;
783  TEUCHOS_TEST_FOR_EXCEPTION
784  (keys_.dimension_0 () != static_cast<size_type> (keys.size ()),
785  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
786  "keepKeys is true, but on return, keys_.dimension_0() = " <<
787  keys_.dimension_0 () << " != keys.size() = " << keys.size () <<
788  ". Please report this bug to the Tpetra developers.");
789 #endif // HAVE_TPETRA_DEBUG
790  }
791 
792 #ifdef HAVE_TPETRA_DEBUG
793  check ();
794 #endif // HAVE_TPETRA_DEBUG
795 }
796 
797 template<class KeyType, class ValueType, class DeviceType>
799 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
800  const ValueType startingValue,
801  const bool keepKeys) :
802  minKey_ (std::numeric_limits<KeyType>::max ()),
803  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
804  std::numeric_limits<KeyType>::min () :
805  -std::numeric_limits<KeyType>::max ()),
806  minVal_ (startingValue),
807  maxVal_ (keys.size () == 0 ?
808  startingValue :
809  static_cast<ValueType> (startingValue + keys.size () - 1)),
810  firstContigKey_ (std::numeric_limits<KeyType>::max ()),
811  lastContigKey_ (std::numeric_limits<KeyType>::is_integer ?
812  std::numeric_limits<KeyType>::min () :
813  -std::numeric_limits<KeyType>::max ()),
814  contiguousValues_ (true),
815  checkedForDuplicateKeys_ (false),
816  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
817 {
818  typedef typename keys_type::non_const_type nonconst_keys_type;
819 
820  // mfh 01 May 2015: I don't trust that
821  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
822  // so I ensure this manually.
823  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
824  keys.size ());
825  using Kokkos::ViewAllocateWithoutInitializing;
826  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
827  keys_k.dimension_0 ());
828  Kokkos::deep_copy (keys_d, keys_k);
829 
830  const KeyType initMinKey = std::numeric_limits<KeyType>::max ();
831  // min() for a floating-point type returns the minimum _positive_
832  // normalized value. This is different than for integer types.
833  // lowest() is new in C++11 and returns the least value, always
834  // negative for signed finite types.
835  //
836  // mfh 23 May 2015: I have heard reports that
837  // std::numeric_limits<int>::lowest() does not exist with the Intel
838  // compiler. I'm not sure if the users in question actually enabled
839  // C++11. However, it's easy enough to work around this issue. The
840  // standard floating-point types are signed and have a sign bit, so
841  // lowest() is just -max(). For integer types, we can use min()
842  // instead.
843  const KeyType initMaxKey = std::numeric_limits<KeyType>::is_integer ?
844  std::numeric_limits<KeyType>::min () :
845  -std::numeric_limits<KeyType>::max ();
846  this->init (keys_d, startingValue, initMinKey, initMaxKey,
847  initMinKey, initMinKey, false);
848  if (keepKeys) {
849  keys_ = keys_d;
850 #ifdef HAVE_TPETRA_DEBUG
851  typedef typename keys_type::size_type size_type;
852  TEUCHOS_TEST_FOR_EXCEPTION
853  (keys_.dimension_0 () != static_cast<size_type> (keys.size ()),
854  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
855  "keepKeys is true, but on return, keys_.dimension_0() = " <<
856  keys_.dimension_0 () << " != keys.size() = " << keys.size () <<
857  ". Please report this bug to the Tpetra developers.");
858 #endif // HAVE_TPETRA_DEBUG
859  }
860 
861 #ifdef HAVE_TPETRA_DEBUG
862  check ();
863 #endif // HAVE_TPETRA_DEBUG
864 }
865 
866 template<class KeyType, class ValueType, class DeviceType>
868 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
869  const KeyType firstContigKey,
870  const KeyType lastContigKey,
871  const ValueType startingValue,
872  const bool keepKeys) :
873  minKey_ (std::numeric_limits<KeyType>::max ()),
874  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
875  std::numeric_limits<KeyType>::min () :
876  -std::numeric_limits<KeyType>::max ()),
877  minVal_ (startingValue),
878  maxVal_ (keys.size () == 0 ?
879  startingValue :
880  static_cast<ValueType> (startingValue + keys.size () - 1)),
881  firstContigKey_ (firstContigKey),
882  lastContigKey_ (lastContigKey),
883  contiguousValues_ (true),
884  checkedForDuplicateKeys_ (false),
885  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
886 {
887  typedef typename keys_type::non_const_type nonconst_keys_type;
888 
889  // mfh 01 May 2015: I don't trust that
890  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
891  // so I ensure this manually.
892  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
893  keys.size ());
894  using Kokkos::ViewAllocateWithoutInitializing;
895  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
896  keys_k.dimension_0 ());
897  Kokkos::deep_copy (keys_d, keys_k);
898 
899  const KeyType initMinKey = std::numeric_limits<KeyType>::max ();
900  // min() for a floating-point type returns the minimum _positive_
901  // normalized value. This is different than for integer types.
902  // lowest() is new in C++11 and returns the least value, always
903  // negative for signed finite types.
904  //
905  // mfh 23 May 2015: I have heard reports that
906  // std::numeric_limits<int>::lowest() does not exist with the Intel
907  // compiler. I'm not sure if the users in question actually enabled
908  // C++11. However, it's easy enough to work around this issue. The
909  // standard floating-point types are signed and have a sign bit, so
910  // lowest() is just -max(). For integer types, we can use min()
911  // instead.
912  const KeyType initMaxKey = std::numeric_limits<KeyType>::is_integer ?
913  std::numeric_limits<KeyType>::min () :
914  -std::numeric_limits<KeyType>::max ();
915  this->init (keys_d, startingValue, initMinKey, initMaxKey,
916  firstContigKey, lastContigKey, true);
917  if (keepKeys) {
918  keys_ = keys_d;
919 #ifdef HAVE_TPETRA_DEBUG
920  typedef typename keys_type::size_type size_type;
921  TEUCHOS_TEST_FOR_EXCEPTION
922  (keys_.dimension_0 () != static_cast<size_type> (keys.size ()),
923  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
924  "keepKeys is true, but on return, keys_.dimension_0() = " <<
925  keys_.dimension_0 () << " != keys.size() = " << keys.size () <<
926  ". Please report this bug to the Tpetra developers.");
927 #endif // HAVE_TPETRA_DEBUG
928  }
929 
930 #ifdef HAVE_TPETRA_DEBUG
931  check ();
932 #endif // HAVE_TPETRA_DEBUG
933 }
934 
935 template<class KeyType, class ValueType, class DeviceType>
938  const ValueType startingValue) :
939  keys_ (keys),
940  minKey_ (std::numeric_limits<KeyType>::max ()),
941  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
942  std::numeric_limits<KeyType>::min () :
943  -std::numeric_limits<KeyType>::max ()),
944  minVal_ (startingValue),
945  maxVal_ (keys.size () == 0 ?
946  startingValue :
947  static_cast<ValueType> (startingValue + keys.size () - 1)),
948  firstContigKey_ (std::numeric_limits<KeyType>::max ()),
949  lastContigKey_ (std::numeric_limits<KeyType>::is_integer ?
950  std::numeric_limits<KeyType>::min () :
951  -std::numeric_limits<KeyType>::max ()),
952  contiguousValues_ (true),
953  checkedForDuplicateKeys_ (false),
954  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
955 {
956  const KeyType initMinKey = std::numeric_limits<KeyType>::max ();
957  // min() for a floating-point type returns the minimum _positive_
958  // normalized value. This is different than for integer types.
959  // lowest() is new in C++11 and returns the least value, always
960  // negative for signed finite types.
961  //
962  // mfh 23 May 2015: I have heard reports that
963  // std::numeric_limits<int>::lowest() does not exist with the Intel
964  // compiler. I'm not sure if the users in question actually enabled
965  // C++11. However, it's easy enough to work around this issue. The
966  // standard floating-point types are signed and have a sign bit, so
967  // lowest() is just -max(). For integer types, we can use min()
968  // instead.
969  const KeyType initMaxKey = std::numeric_limits<KeyType>::is_integer ?
970  std::numeric_limits<KeyType>::min () :
971  -std::numeric_limits<KeyType>::max ();
972  this->init (keys, startingValue, initMinKey, initMaxKey,
973  initMinKey, initMinKey, false);
974 
975 #ifdef HAVE_TPETRA_DEBUG
976  check ();
977 #endif // HAVE_TPETRA_DEBUG
978 }
979 
980 template<class KeyType, class ValueType, class DeviceType>
982 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
983  const Teuchos::ArrayView<const ValueType>& vals) :
984  minKey_ (std::numeric_limits<KeyType>::max ()),
985  maxKey_ (std::numeric_limits<KeyType>::is_integer ?
986  std::numeric_limits<KeyType>::min () :
987  -std::numeric_limits<KeyType>::max ()),
988  minVal_ (std::numeric_limits<ValueType>::max ()),
989  maxVal_ (std::numeric_limits<ValueType>::is_integer ?
990  std::numeric_limits<ValueType>::min () :
991  -std::numeric_limits<ValueType>::max ()),
992  firstContigKey_ (std::numeric_limits<KeyType>::max ()),
993  lastContigKey_ (std::numeric_limits<KeyType>::is_integer ?
994  std::numeric_limits<KeyType>::min () :
995  -std::numeric_limits<KeyType>::max ()),
996  contiguousValues_ (false),
997  checkedForDuplicateKeys_ (false),
998  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
999 {
1000  // mfh 01 May 2015: I don't trust that
1001  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
1002  // so I ensure this manually.
1003  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
1004  keys.size ());
1005  host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
1006  vals.size ());
1007  const KeyType initMinKey = std::numeric_limits<KeyType>::max ();
1008  // min() for a floating-point type returns the minimum _positive_
1009  // normalized value. This is different than for integer types.
1010  // lowest() is new in C++11 and returns the least value, always
1011  // negative for signed finite types.
1012  //
1013  // mfh 23 May 2015: I have heard reports that
1014  // std::numeric_limits<int>::lowest() does not exist with the Intel
1015  // compiler. I'm not sure if the users in question actually enabled
1016  // C++11. However, it's easy enough to work around this issue. The
1017  // standard floating-point types are signed and have a sign bit, so
1018  // lowest() is just -max(). For integer types, we can use min()
1019  // instead.
1020  const KeyType initMaxKey = std::numeric_limits<KeyType>::is_integer ?
1021  std::numeric_limits<KeyType>::min () :
1022  -std::numeric_limits<KeyType>::max ();
1023  this->init (keys_k, vals_k, initMinKey, initMaxKey);
1024 
1025 #ifdef HAVE_TPETRA_DEBUG
1026  check ();
1027 #endif // HAVE_TPETRA_DEBUG
1028 }
1029 
1030 template<class KeyType, class ValueType, class DeviceType>
1031 void
1033 init (const keys_type& keys,
1034  ValueType startingValue,
1035  KeyType initMinKey,
1036  KeyType initMaxKey,
1037  KeyType firstContigKey,
1038  KeyType lastContigKey,
1039  const bool computeInitContigKeys)
1040 {
1041  using Kokkos::subview;
1042 
1043  const offset_type numKeys = static_cast<offset_type> (keys.dimension_0 ());
1044  TEUCHOS_TEST_FOR_EXCEPTION
1045  (static_cast<unsigned long long> (numKeys) >
1046  static_cast<unsigned long long> (std::numeric_limits<ValueType>::max ()),
1047  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1048  "keys " << numKeys << " is greater than the maximum representable "
1049  "ValueType value " << std::numeric_limits<ValueType>::max () << ". "
1050  "This means that it is not possible to use this constructor.");
1051  TEUCHOS_TEST_FOR_EXCEPTION
1052  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1053  "Details::FixedHashTable: This class currently only works when the number "
1054  "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1055  ", please talk to the Tpetra developers.");
1056 
1057  const bool buildInParallel =
1058  FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
1059 
1060  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1061  // could change that by setting up ptr and val as Kokkos::DualView
1062  // instances. If we do that, since we are filling on host for now,
1063  // we want to make sure that we only zero-fill ptr on host
1064  // initially, and that we don't fill val at all. Once we finish
1065  // Kokkos-izing all the set-up kernels, we won't need DualView for
1066  // either ptr or val.
1067 
1068  if (computeInitContigKeys) {
1069  // Find the first and last initial contiguous keys. If we find a
1070  // long sequence of initial contiguous keys, we can save space by
1071  // not storing them explicitly as pairs in the hash table.
1072  //
1073  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
1074  // ("min index such that the difference between the current key and
1075  // the next != 1"), which takes multiple passes over the data. We
1076  // could fuse it with CountBuckets (only update counts on 'final'
1077  // pass). However, we're really just moving this sequential search
1078  // out of Map's constructor here, so there is no loss in doing it
1079  // sequentially for now. Later, we can work on parallelization.
1080  //
1081  // FIXME (mfh 01 Jun 2015) This assumes UVM.
1082  if (numKeys > 0) {
1083  firstContigKey_ = keys[0];
1084  // Start with one plus, then decrement at the end. That lets us do
1085  // only one addition per loop iteration, rather than two (if we test
1086  // against lastContigKey + 1 and then increment lastContigKey).
1087  lastContigKey_ = firstContigKey_ + 1;
1088 
1089  // We will only store keys in the table that are not part of the
1090  // initial contiguous sequence. It's possible for the initial
1091  // contiguous sequence to be trivial, which for a nonzero number of
1092  // keys means that the "sequence" has length 1.
1093  for (offset_type k = 1; k < numKeys; ++k) {
1094  if (lastContigKey_ != keys[k]) {
1095  break;
1096  }
1097  ++lastContigKey_;
1098  }
1099  --lastContigKey_;
1100  }
1101  }
1102  else {
1103  firstContigKey_ = firstContigKey;
1104  lastContigKey_ = lastContigKey;
1105  }
1106 
1107  offset_type startIndex;
1108  if (numKeys > 0) {
1109  initMinKey = std::min (initMinKey, firstContigKey_);
1110  initMaxKey = std::max (initMaxKey, lastContigKey_);
1111  startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
1112  } else {
1113  startIndex = 0;
1114  }
1115 
1116  const offset_type theNumKeys = numKeys - startIndex;
1117  const offset_type size = hash_type::getRecommendedSize (theNumKeys);
1118 #ifdef HAVE_TPETRA_DEBUG
1119  TEUCHOS_TEST_FOR_EXCEPTION(
1120  size == 0 && numKeys != 0, std::logic_error,
1121  "Tpetra::Details::FixedHashTable constructor: "
1122  "getRecommendedSize(" << numKeys << ") returned zero, "
1123  "even though the number of keys " << numKeys << " is nonzero. "
1124  "Please report this bug to the Tpetra developers.");
1125 #endif // HAVE_TPETRA_DEBUG
1126  keys_type theKeys =
1127  subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
1128 
1129  // The array of counts must be separate from the array of offsets,
1130  // in order for parallel_scan to work correctly.
1131  typedef typename ptr_type::non_const_type counts_type;
1132  counts_type counts ("FixedHashTable::counts", size);
1133 
1134  //
1135  // Count the number of "buckets" per offsets array (ptr) entry.
1136  //
1137 
1138  // The Kokkos kernel uses atomic update instructions to count the
1139  // number of "buckets" per offsets array (ptr) entry. Atomic
1140  // updates incur overhead, even in the sequential case. The Kokkos
1141  // kernel is still correct in that case, but I would rather not
1142  // incur overhead then.
1143  if (buildInParallel) {
1144  FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
1145  Kokkos::parallel_for (theNumKeys, functor);
1146  }
1147  else {
1148  for (offset_type k = 0; k < theNumKeys; ++k) {
1149  typedef typename hash_type::result_type hash_value_type;
1150 
1151  const hash_value_type hashVal = hash_type::hashFunc (theKeys[k], size);
1152  ++counts[hashVal];
1153  }
1154  }
1155 
1156  // Kokkos::View fills with zeros by default.
1157  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size + 1);
1158 
1159  // Compute row offsets via prefix sum:
1160  //
1161  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1162  //
1163  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1164  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1165  // formula is ptr[i+1] += ptr[i].
1166  //
1167  // parallel_scan does not incur overhead with Kokkos::Serial, but
1168  // with actual parallel execution spaces, it does require multiple
1169  // passes over the data. Thus, it still makes sense to have a
1170  // sequential fall-back.
1171  if (buildInParallel) {
1173  functor_type functor (ptr, counts);
1174  Kokkos::parallel_scan (size+1, functor);
1175  }
1176  else {
1177  for (offset_type i = 0; i < size; ++i) {
1178  //ptr[i+1] += ptr[i];
1179  ptr[i+1] = ptr[i] + counts[i];
1180  }
1181  //ptr[0] = 0; // We've already done this when initializing ptr above.
1182  }
1183 
1184  // Allocate the array of (key,value) pairs. Don't fill it with
1185  // zeros, because we will fill it with actual data below.
1186  using Kokkos::ViewAllocateWithoutInitializing;
1187  typedef typename val_type::non_const_type nonconst_val_type;
1188  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1189  theNumKeys);
1190 
1191  // Fill in the hash table's "values" (the (key,value) pairs).
1192  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1193  typename ptr_type::non_const_type> functor_type;
1194  typename functor_type::value_type result (initMinKey, initMaxKey);
1195 
1196  const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
1197  if (buildInParallel) {
1198  functor_type functor (val, counts, ptr, theKeys, newStartingValue,
1199  initMinKey, initMaxKey);
1200  Kokkos::parallel_reduce (theNumKeys, functor, result);
1201  }
1202  else {
1203  for (offset_type k = 0; k < theNumKeys; ++k) {
1204  typedef typename hash_type::result_type hash_value_type;
1205  const KeyType key = theKeys[k];
1206  if (key > result.maxKey_) {
1207  result.maxKey_ = key;
1208  }
1209  if (key < result.minKey_) {
1210  result.minKey_ = key;
1211  }
1212  const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1213  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1214 
1215  // Return the old count; decrement afterwards.
1216  const offset_type count = counts[hashVal];
1217  --counts[hashVal];
1218  if (count == 0) {
1219  result.success_ = false; // FAILURE!
1220  break;
1221  }
1222  else {
1223  const offset_type curPos = ptr[hashVal+1] - count;
1224 
1225  val[curPos].first = key;
1226  val[curPos].second = theVal;
1227  }
1228  }
1229  }
1230 
1231  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1232  // reports of exceptions being thrown in Albany.
1233  //
1234  // TEUCHOS_TEST_FOR_EXCEPTION
1235  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1236  // "init: Filling the hash table failed! Please report this bug to the "
1237  // "Tpetra developers.");
1238  (void) result; // prevent build warning (set but unused variable)
1239 
1240  // "Commit" the computed arrays and other computed quantities.
1241  ptr_ = ptr;
1242  val_ = val;
1243  minKey_ = result.minKey_;
1244  maxKey_ = result.maxKey_;
1245  // We've already set firstContigKey_ and lastContigKey_ above.
1246 }
1247 
1248 
1249 template<class KeyType, class ValueType, class DeviceType>
1250 void
1252 init (const host_input_keys_type& keys,
1253  const host_input_vals_type& vals,
1254  KeyType initMinKey,
1255  KeyType initMaxKey)
1256 {
1257  const offset_type numKeys = static_cast<offset_type> (keys.dimension_0 ());
1258  TEUCHOS_TEST_FOR_EXCEPTION
1259  (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (std::numeric_limits<ValueType>::max ()),
1260  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1261  "keys " << numKeys << " is greater than the maximum representable "
1262  "ValueType value " << std::numeric_limits<ValueType>::max () << ".");
1263  TEUCHOS_TEST_FOR_EXCEPTION
1264  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1265  "Details::FixedHashTable: This class currently only works when the number "
1266  "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1267  ", please talk to the Tpetra developers.");
1268 
1269  // There's no need to find the first and last initial contiguous
1270  // keys in this case, because if we reach this init() function, then
1271  // hasContiguousValues() is false and so get() doesn't use the
1272  // initial contiguous sequence of keys.
1273 
1274  const offset_type size = hash_type::getRecommendedSize (numKeys);
1275 #ifdef HAVE_TPETRA_DEBUG
1276  TEUCHOS_TEST_FOR_EXCEPTION(
1277  size == 0 && numKeys != 0, std::logic_error,
1278  "Tpetra::Details::FixedHashTable constructor: "
1279  "getRecommendedSize(" << numKeys << ") returned zero, "
1280  "even though the number of keys " << numKeys << " is nonzero. "
1281  "Please report this bug to the Tpetra developers.");
1282 #endif // HAVE_TPETRA_DEBUG
1283 
1284  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1285  // could change that by setting up ptr and val as Kokkos::DualView
1286  // instances. If we do that, since we are filling on host for now,
1287  // we want to make sure that we only zero-fill ptr on host
1288  // initially, and that we don't fill val at all. Once we finish
1289  // Kokkos-izing all the set-up kernels, we won't need DualView for
1290  // either ptr or val.
1291 
1292  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size + 1);
1293 
1294  // Allocate the array of key,value pairs. Don't waste time filling
1295  // it with zeros, because we will fill it with actual data below.
1296  using Kokkos::ViewAllocateWithoutInitializing;
1297  typedef typename val_type::non_const_type nonconst_val_type;
1298  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1299  numKeys);
1300 
1301  // Compute number of entries in each hash table position.
1302  for (offset_type k = 0; k < numKeys; ++k) {
1303  const typename hash_type::result_type hashVal =
1304  hash_type::hashFunc (keys[k], size);
1305  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1306  ++ptr[hashVal+1];
1307  }
1308 
1309  // Compute row offsets via prefix sum:
1310  //
1311  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1312  //
1313  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1314  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1315  // formula is ptr[i+1] += ptr[i].
1316  for (offset_type i = 0; i < size; ++i) {
1317  ptr[i+1] += ptr[i];
1318  }
1319  //ptr[0] = 0; // We've already done this when initializing ptr above.
1320 
1321  // curRowStart[i] is the offset of the next element in row i.
1322  typename ptr_type::non_const_type curRowStart ("FixedHashTable::curRowStart", size);
1323 
1324  // Fill in the hash table.
1325  FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1326  for (offset_type k = 0; k < numKeys; ++k) {
1327  typedef typename hash_type::result_type hash_value_type;
1328  const KeyType key = keys[k];
1329  if (key > result.maxKey_) {
1330  result.maxKey_ = key;
1331  }
1332  if (key < result.minKey_) {
1333  result.minKey_ = key;
1334  }
1335  const ValueType theVal = vals[k];
1336  if (theVal > maxVal_) {
1337  maxVal_ = theVal;
1338  }
1339  if (theVal < minVal_) {
1340  minVal_ = theVal;
1341  }
1342  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1343 
1344  const offset_type offset = curRowStart[hashVal];
1345  const offset_type curPos = ptr[hashVal] + offset;
1346  if (curPos >= ptr[hashVal+1]) {
1347  result.success_ = false; // FAILURE!
1348  }
1349  else {
1350  val[curPos].first = key;
1351  val[curPos].second = theVal;
1352  ++curRowStart[hashVal];
1353  }
1354  }
1355 
1356  TEUCHOS_TEST_FOR_EXCEPTION
1357  (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1358  "init: Filling the hash table failed! Please report this bug to the "
1359  "Tpetra developers.");
1360 
1361  // "Commit" the computed arrays and other computed quantities.
1362  ptr_ = ptr;
1363  val_ = val;
1364  minKey_ = result.minKey_;
1365  maxKey_ = result.maxKey_;
1366  // We've already assigned to minVal_ and maxVal_ above.
1367 }
1368 
1369 template <class KeyType, class ValueType, class DeviceType>
1370 bool
1373 {
1374  if (! checkedForDuplicateKeys_) {
1375  hasDuplicateKeys_ = checkForDuplicateKeys ();
1376  checkedForDuplicateKeys_ = true;
1377  }
1378  return hasDuplicateKeys_;
1379 }
1380 
1381 template <class KeyType, class ValueType, class DeviceType>
1382 bool
1384 checkForDuplicateKeys () const
1385 {
1386  const offset_type size = this->getSize ();
1387  if (size == 0) {
1388  return false;
1389  }
1390  else {
1392  functor_type functor (val_, ptr_);
1393  bool hasDupKeys = false;
1394  Kokkos::parallel_reduce (size, functor, hasDupKeys);
1395  return hasDupKeys;
1396  }
1397 }
1398 
1399 template <class KeyType, class ValueType, class DeviceType>
1400 std::string
1402 description () const
1403 {
1404  std::ostringstream oss;
1405  oss << "FixedHashTable<"
1406  << Teuchos::TypeNameTraits<KeyType>::name () << ","
1407  << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1408  << "{ numKeys: " << val_.dimension_0 ()
1409  << ", tableSize: " << this->getSize () << " }";
1410  return oss.str();
1411 }
1412 
1413 template <class KeyType, class ValueType, class DeviceType>
1414 void
1416 describe (Teuchos::FancyOStream& out,
1417  const Teuchos::EVerbosityLevel verbLevel) const
1418 {
1419  using std::endl;
1420  using std::setw;
1421  using Teuchos::OSTab;
1422  using Teuchos::rcpFromRef;
1423  using Teuchos::TypeNameTraits;
1424  using Teuchos::VERB_DEFAULT;
1425  using Teuchos::VERB_NONE;
1426  using Teuchos::VERB_LOW;
1427  using Teuchos::VERB_EXTREME;
1428 
1429  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1430  // access to ptr_ and val_ from the host.
1431 
1432  Teuchos::EVerbosityLevel vl = verbLevel;
1433  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1434 
1435  if (vl == VERB_NONE) {
1436  // do nothing
1437  }
1438  else if (vl == VERB_LOW) {
1439  out << this->description() << endl;
1440  }
1441  else { // MEDIUM, HIGH or EXTREME
1442  out << "FixedHashTable:" << endl;
1443  {
1444  OSTab tab1 (rcpFromRef (out));
1445 
1446  const std::string label = this->getObjectLabel ();
1447  if (label != "") {
1448  out << "label: " << label << endl;
1449  }
1450  out << "Template parameters:" << endl;
1451  {
1452  OSTab tab2 (rcpFromRef (out));
1453  out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1454  << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1455  }
1456 
1457  const offset_type tableSize = this->getSize ();
1458  const offset_type numKeys = val_.dimension_0 ();
1459 
1460  out << "Table parameters:" << endl;
1461  {
1462  OSTab tab2 (rcpFromRef (out));
1463  out << "numKeys: " << numKeys << endl
1464  << "tableSize: " << tableSize << endl;
1465  }
1466 
1467  if (vl >= VERB_EXTREME) {
1468  out << "Contents: ";
1469  if (tableSize == 0 || numKeys == 0) {
1470  out << "[]" << endl;
1471  } else {
1472  out << "[ " << endl;
1473  {
1474  OSTab tab2 (rcpFromRef (out));
1475  for (offset_type i = 0; i < tableSize; ++i) {
1476  OSTab tab3 (rcpFromRef (out));
1477  out << "[";
1478  for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1479  out << "(" << val_[k].first << "," << val_[k].second << ")";
1480  if (k + 1 < ptr_[i+1]) {
1481  out << ", ";
1482  }
1483  }
1484  out << "]" << endl;
1485  } // for each table position i
1486  }
1487  out << "]" << endl;
1488  } // The table contains entries
1489  } // vl >= VERB_EXTREME
1490  }
1491  out << endl;
1492  } // if vl > VERB_LOW
1493 }
1494 
1495 } // namespace Details
1496 } // namespace Tpetra
1497 
1498 // Macro that explicitly instantiates FixedHashTable for the given local
1499 // ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1500 // template parameters occur in the opposite order of most Tpetra
1501 // classes. This is because FixedHashTable performs global-to-local
1502 // lookup, and the convention in templated C++ lookup tables (such as
1503 // std::map) is <KeyType, ValueType>.
1504 //
1505 // This macro must be explanded within the Tpetra::Details namespace.
1506 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1507  template class FixedHashTable< GO , LO >;
1508 
1509 // Macro that explicitly instantiates FixedHashTable for the given
1510 // local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1511 // types. Note that FixedHashTable's first two template parameters
1512 // occur in the opposite order of most Tpetra classes. This is
1513 // because FixedHashTable performs global-to-local lookup, and the
1514 // convention in templated C++ lookup tables (such as std::map) is
1515 // <KeyType, ValueType>.
1516 //
1517 // This macro must be explanded within the Tpetra::Details namespace.
1518 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1519  template class FixedHashTable< GO , LO , DEVICE >;
1520 
1521 #endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys...
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void join(volatile value_type &dst, const volatile value_type &src) const
Combine two intermediate reduction results.
static result_type getRecommendedSize(const offset_type size)
Number of "buckets" that the constructor of FixedHashTable should allocate.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
ResultType result_type
Type of the return value of the hash function.
Parallel for functor for counting "buckets" in the FixedHashTable.
void deep_copy(const LittleBlock< ST2, LO > &dst, const LittleBlock< ST1, LO > &src, typename std::enable_if< std::is_convertible< ST1, ST2 >::value &&!std::is_const< ST2 >::value, int >::type *=NULL)
Copy the LittleBlock src into the LittleBlock dst.
Parallel scan functor for computing "row" offsets.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
bool hasKeys() const
Whether it is safe to call getKey().
bool success_
Whether fill succeeded (it can only fail on a bug)
Implementation details of Tpetra.
The hash function for FixedHashTable.
std::string description() const
Implementation of Teuchos::Describable.
FixedHashTable()
Default constructor; makes an empty table.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &key, const offset_type &size)
The hash function.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
ComputeRowOffsets(const offsets_view_type &offsets, const counts_view_type &counts)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
Reduction result for FillPairs functor below.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.