Tpetra parallel linear algebra  Version of the Day
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef __Tpetra_ComputeGatherMap_hpp
43 #define __Tpetra_ComputeGatherMap_hpp
44 
49 #include "Tpetra_Map.hpp"
50 
51 // Macro that marks a function as "possibly unused," in order to
52 // suppress build warnings.
53 #if ! defined(TRILINOS_UNUSED_FUNCTION)
54 # if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
55 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
56 # elif defined(__clang__)
57 # if __has_attribute(unused)
58 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
59 # else
60 # define TRILINOS_UNUSED_FUNCTION
61 # endif // Clang has 'unused' attribute
62 # elif defined(__IBMCPP__)
63 // IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
64 //
65 // http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
66 # define TRILINOS_UNUSED_FUNCTION
67 # else // some other compiler
68 # define TRILINOS_UNUSED_FUNCTION
69 # endif
70 #endif // ! defined(TRILINOS_UNUSED_FUNCTION)
71 
72 
73 namespace Tpetra {
74  namespace Details {
75 
76  namespace {
77 #ifdef HAVE_MPI
78  // We're communicating integers of type IntType. Figure
79  // out the right MPI_Datatype for IntType. Usually it
80  // is int or long, so these are good enough for now.
81  template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype
82  getMpiDatatype () {
83  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
84  "Not implemented for IntType != int, long, or long long");
85  }
86 
87  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
88  getMpiDatatype<int> () { return MPI_INT; }
89 
90  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
91  getMpiDatatype<long> () { return MPI_LONG; }
92 
93  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
94  getMpiDatatype<long long> () { return MPI_LONG_LONG; }
95 #endif // HAVE_MPI
96 
97  template<class IntType>
98  void
99  gather (const IntType sendbuf[], const int sendcnt,
100  IntType recvbuf[], const int recvcnt,
101  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
102  {
103  using Teuchos::RCP;
104  using Teuchos::rcp_dynamic_cast;
105 #ifdef HAVE_MPI
106  using Teuchos::MpiComm;
107 
108  // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
109  RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm);
110  if (! mpiComm.is_null ()) {
111  MPI_Datatype sendtype = getMpiDatatype<IntType> ();
112  MPI_Datatype recvtype = sendtype;
113  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
114  const int err = MPI_Gather (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
115  sendcnt,
116  sendtype,
117  reinterpret_cast<void*> (recvbuf),
118  recvcnt,
119  recvtype,
120  root,
121  rawMpiComm);
122  TEUCHOS_TEST_FOR_EXCEPTION(
123  err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
124  return;
125  }
126 #endif // HAVE_MPI
127 
128  TEUCHOS_TEST_FOR_EXCEPTION(
129  recvcnt > sendcnt, std::invalid_argument,
130  "gather: If the input communicator contains only one process, "
131  "then you cannot receive more entries than you send. "
132  "You aim to receive " << recvcnt << " entries, "
133  "but to send " << sendcnt << ".");
134  // Serial communicator case: just copy. recvcnt is the
135  // amount to receive, so it's the amount to copy.
136  std::copy (sendbuf, sendbuf + recvcnt, recvbuf);
137  }
138 
139  template<class IntType>
140  void
141  gatherv (const IntType sendbuf[], const int sendcnt,
142  IntType recvbuf[], const int recvcnts[], const int displs[],
143  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
144  {
145  using Teuchos::RCP;
146  using Teuchos::rcp_dynamic_cast;
147 #ifdef HAVE_MPI
148  using Teuchos::MpiComm;
149 
150  // Get the raw MPI communicator, so we don't have to wrap
151  // MPI_Gather in Teuchos.
152  RCP<const MpiComm<int> > mpiComm =
153  rcp_dynamic_cast<const MpiComm<int> > (comm);
154  if (! mpiComm.is_null ()) {
155  MPI_Datatype sendtype = getMpiDatatype<IntType> ();
156  MPI_Datatype recvtype = sendtype;
157  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
158  const int err = MPI_Gatherv (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
159  sendcnt,
160  sendtype,
161  reinterpret_cast<void*> (recvbuf),
162  const_cast<int*> (recvcnts),
163  const_cast<int*> (displs),
164  recvtype,
165  root,
166  rawMpiComm);
167  TEUCHOS_TEST_FOR_EXCEPTION(
168  err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
169  return;
170  }
171 #endif // HAVE_MPI
172  TEUCHOS_TEST_FOR_EXCEPTION(
173  recvcnts[0] > sendcnt,
174  std::invalid_argument,
175  "gatherv: If the input communicator contains only one process, "
176  "then you cannot receive more entries than you send. "
177  "You aim to receive " << recvcnts[0] << " entries, "
178  "but to send " << sendcnt << ".");
179  // Serial communicator case: just copy. recvcnts[0] is the
180  // amount to receive, so it's the amount to copy. Start
181  // writing to recvbuf at the offset displs[0].
182  std::copy (sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
183  }
184  } // namespace (anonymous)
185 
186 
187  // Given an arbitrary Map, compute a Map containing all the GIDs
188  // in the same order as in (the one-to-one version of) map, but
189  // all owned exclusively by Proc 0.
190  template<class MapType>
191  Teuchos::RCP<const MapType>
192  computeGatherMap (Teuchos::RCP<const MapType> map,
193  const Teuchos::RCP<Teuchos::FancyOStream>& err,
194  const bool dbg=false)
195  {
197  using Tpetra::global_size_t;
198  using Teuchos::Array;
199  using Teuchos::ArrayRCP;
200  using Teuchos::ArrayView;
201  using Teuchos::as;
202  using Teuchos::Comm;
203  using Teuchos::RCP;
204  using std::endl;
205  typedef typename MapType::local_ordinal_type LO;
206  typedef typename MapType::global_ordinal_type GO;
207  typedef typename MapType::node_type NT;
208 
209  RCP<const Comm<int> > comm = map->getComm ();
210  const int numProcs = comm->getSize ();
211  const int myRank = comm->getRank ();
212 
213  if (! err.is_null ()) {
214  err->pushTab ();
215  }
216  if (dbg) {
217  *err << myRank << ": computeGatherMap:" << endl;
218  }
219  if (! err.is_null ()) {
220  err->pushTab ();
221  }
222 
223  RCP<const MapType> oneToOneMap;
224  if (map->isContiguous ()) {
225  oneToOneMap = map; // contiguous Maps are always 1-to-1
226  } else {
227  if (dbg) {
228  *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
229  }
230  // It could be that Map is one-to-one, but the class doesn't
231  // give us a way to test this, other than to create the
232  // one-to-one Map.
233  oneToOneMap = createOneToOne<LO, GO, NT> (map);
234  }
235 
236  RCP<const MapType> gatherMap;
237  if (numProcs == 1) {
238  gatherMap = oneToOneMap;
239  } else {
240  if (dbg) {
241  *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
242  }
243  // Gather each process' count of Map elements to Proc 0,
244  // into the recvCounts array. This will tell Proc 0 how
245  // many GIDs to expect from each process when calling
246  // MPI_Gatherv. Counts and offsets are all int, because
247  // that's what MPI uses. Teuchos::as will at least prevent
248  // bad casts to int in a debug build.
249  //
250  // Yeah, it's not memory scalable. Guess what: We're going
251  // to bring ALL the data to Proc 0, not just the receive
252  // counts. The first MPI_Gather is only the beginning...
253  // Seriously, if you want to make this memory scalable, the
254  // right thing to do (after the Export to the one-to-one
255  // Map) is to go round robin through the processes, having
256  // each send a chunk of data (with its GIDs, to get the
257  // order of rows right) at a time, and overlapping writing
258  // to the file (resp. reading from it) with receiving (resp.
259  // sending) the next chunk.
260  const int myEltCount = as<int> (oneToOneMap->getNodeNumElements ());
261  Array<int> recvCounts (numProcs);
262  const int rootProc = 0;
263  gather<int> (&myEltCount, 1, recvCounts.getRawPtr (), 1, rootProc, comm);
264 
265  ArrayView<const GO> myGlobalElts = oneToOneMap->getNodeElementList ();
266  const int numMyGlobalElts = as<int> (myGlobalElts.size ());
267  // Only Proc 0 needs to receive and store all the GIDs (from
268  // all processes).
269  ArrayRCP<GO> allGlobalElts;
270  if (myRank == 0) {
271  allGlobalElts = arcp<GO> (oneToOneMap->getGlobalNumElements ());
272  std::fill (allGlobalElts.begin (), allGlobalElts.end (), 0);
273  }
274 
275  if (dbg) {
276  *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
277  "displacements" << endl;
278  }
279  // Displacements for gatherv() in this case (where we are
280  // gathering into a contiguous array) are an exclusive
281  // partial sum (first entry is zero, second starts the
282  // partial sum) of recvCounts.
283  Array<int> displs (numProcs, 0);
284  std::partial_sum (recvCounts.begin (), recvCounts.end () - 1,
285  displs.begin () + 1);
286  if (dbg) {
287  *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
288  }
289  gatherv<GO> (myGlobalElts.getRawPtr (), numMyGlobalElts,
290  allGlobalElts.getRawPtr (), recvCounts.getRawPtr (),
291  displs.getRawPtr (), rootProc, comm);
292 
293  if (dbg) {
294  *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
295  }
296  // Create a Map with all the GIDs, in the same order as in
297  // the one-to-one Map, but owned by Proc 0.
298  ArrayView<const GO> allElts (NULL, 0);
299  if (myRank == 0) {
300  allElts = allGlobalElts ();
301  }
302  const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid ();
303  gatherMap = rcp (new MapType (INVALID, allElts,
304  oneToOneMap->getIndexBase (),
305  comm, map->getNode ()));
306  }
307  if (! err.is_null ()) {
308  err->popTab ();
309  }
310  if (dbg) {
311  *err << myRank << ": computeGatherMap: done" << endl;
312  }
313  if (! err.is_null ()) {
314  err->popTab ();
315  }
316  return gatherMap;
317  }
318 
319  } // namespace Details
320 } // namespace Tpetra
321 
322 #endif // __Tpetra_ComputeGatherMap_hpp
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Implementation details of Tpetra.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID is owned by only one process...