41 #include "Tpetra_Distributor.hpp" 42 #include "Teuchos_StandardParameterEntryValidators.hpp" 43 #include "Teuchos_VerboseObjectParameterListHelpers.hpp" 51 if (sendType == DISTRIBUTOR_ISEND) {
54 else if (sendType == DISTRIBUTOR_RSEND) {
57 else if (sendType == DISTRIBUTOR_SEND) {
60 else if (sendType == DISTRIBUTOR_SSEND) {
64 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid " 65 "EDistributorSendType enum value " << sendType <<
".");
73 case Details::DISTRIBUTOR_NOT_INITIALIZED:
74 return "Not initialized yet";
75 case Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS:
76 return "By createFromSends";
77 case Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS:
78 return "By createFromRecvs";
79 case Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE:
80 return "By createReverseDistributor";
81 case Details::DISTRIBUTOR_INITIALIZED_BY_COPY:
82 return "By copy constructor";
92 Array<std::string> sendTypes;
93 sendTypes.push_back (
"Isend");
94 sendTypes.push_back (
"Rsend");
95 sendTypes.push_back (
"Send");
96 sendTypes.push_back (
"Ssend");
106 const bool tpetraDistributorDebugDefault =
false;
108 const bool barrierBetween_default =
false;
110 const bool useDistinctTags_default =
true;
112 #ifdef TPETRA_ENABLE_MPI_CUDA_RDMA 113 const bool enable_cuda_rdma_default =
true;
115 const bool enable_cuda_rdma_default =
false;
119 int Distributor::getTag (
const int pathTag)
const {
120 return useDistinctTags_ ? pathTag : comm_->getTag ();
124 #ifdef TPETRA_DISTRIBUTOR_TIMERS 125 void Distributor::makeTimers () {
126 const std::string name_doPosts3 =
"Tpetra::Distributor: doPosts(3)";
127 const std::string name_doPosts4 =
"Tpetra::Distributor: doPosts(4)";
128 const std::string name_doWaits =
"Tpetra::Distributor: doWaits";
129 const std::string name_doPosts3_recvs =
"Tpetra::Distributor: doPosts(3): recvs";
130 const std::string name_doPosts4_recvs =
"Tpetra::Distributor: doPosts(4): recvs";
131 const std::string name_doPosts3_barrier =
"Tpetra::Distributor: doPosts(3): barrier";
132 const std::string name_doPosts4_barrier =
"Tpetra::Distributor: doPosts(4): barrier";
133 const std::string name_doPosts3_sends =
"Tpetra::Distributor: doPosts(3): sends";
134 const std::string name_doPosts4_sends =
"Tpetra::Distributor: doPosts(4): sends";
136 timer_doPosts3_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3);
137 timer_doPosts4_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4);
138 timer_doWaits_ = Teuchos::TimeMonitor::getNewTimer (name_doWaits);
139 timer_doPosts3_recvs_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3_recvs);
140 timer_doPosts4_recvs_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4_recvs);
141 timer_doPosts3_barrier_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3_barrier);
142 timer_doPosts4_barrier_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4_barrier);
143 timer_doPosts3_sends_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3_sends);
144 timer_doPosts4_sends_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4_sends);
146 #endif // TPETRA_DISTRIBUTOR_TIMERS 149 Distributor::init (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
150 const Teuchos::RCP<Teuchos::FancyOStream>& out,
151 const Teuchos::RCP<Teuchos::ParameterList>& plist)
153 this->out_ = out.is_null () ?
154 Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr)) : out;
155 if (! plist.is_null ()) {
156 this->setParameterList (plist);
159 #ifdef TPETRA_DISTRIBUTOR_TIMERS 161 #endif // TPETRA_DISTRIBUTOR_TIMERS 164 TEUCHOS_TEST_FOR_EXCEPTION
165 (out_.is_null (), std::logic_error,
"Tpetra::Distributor::init: debug_ " 166 "is true but out_ (pointer to the output stream) is NULL. Please " 167 "report this bug to the Tpetra developers.");
168 Teuchos::OSTab tab (out_);
169 std::ostringstream os;
170 os << comm_->getRank ()
171 <<
": Distributor ctor done" << std::endl;
178 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
179 , sendType_ (
Details::DISTRIBUTOR_SEND)
180 , barrierBetween_ (barrierBetween_default)
181 , debug_ (tpetraDistributorDebugDefault)
182 , enable_cuda_rdma_ (enable_cuda_rdma_default)
184 , selfMessage_ (false)
188 , totalReceiveLength_ (0)
189 , lastRoundBytesSend_ (0)
190 , lastRoundBytesRecv_ (0)
191 , useDistinctTags_ (useDistinctTags_default)
193 init (comm, Teuchos::null, Teuchos::null);
197 const Teuchos::RCP<Teuchos::FancyOStream>& out)
199 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
200 , sendType_ (
Details::DISTRIBUTOR_SEND)
201 , barrierBetween_ (barrierBetween_default)
202 , debug_ (tpetraDistributorDebugDefault)
203 , enable_cuda_rdma_ (enable_cuda_rdma_default)
205 , selfMessage_ (false)
209 , totalReceiveLength_ (0)
210 , lastRoundBytesSend_ (0)
211 , lastRoundBytesRecv_ (0)
212 , useDistinctTags_ (useDistinctTags_default)
214 init (comm, out, Teuchos::null);
218 const Teuchos::RCP<Teuchos::ParameterList>& plist)
220 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
221 , sendType_ (
Details::DISTRIBUTOR_SEND)
222 , barrierBetween_ (barrierBetween_default)
223 , debug_ (tpetraDistributorDebugDefault)
224 , enable_cuda_rdma_ (enable_cuda_rdma_default)
226 , selfMessage_ (false)
230 , totalReceiveLength_ (0)
231 , lastRoundBytesSend_ (0)
232 , lastRoundBytesRecv_ (0)
233 , useDistinctTags_ (useDistinctTags_default)
235 init (comm, Teuchos::null, plist);
239 const Teuchos::RCP<Teuchos::FancyOStream>& out,
240 const Teuchos::RCP<Teuchos::ParameterList>& plist)
242 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
243 , sendType_ (
Details::DISTRIBUTOR_SEND)
244 , barrierBetween_ (barrierBetween_default)
245 , debug_ (tpetraDistributorDebugDefault)
246 , enable_cuda_rdma_ (enable_cuda_rdma_default)
248 , selfMessage_ (false)
252 , totalReceiveLength_ (0)
253 , lastRoundBytesSend_ (0)
254 , lastRoundBytesRecv_ (0)
255 , useDistinctTags_ (useDistinctTags_default)
257 init (comm, out, plist);
261 : comm_ (distributor.comm_)
262 , out_ (distributor.out_)
263 , howInitialized_ (
Details::DISTRIBUTOR_INITIALIZED_BY_COPY)
264 , sendType_ (distributor.sendType_)
265 , barrierBetween_ (distributor.barrierBetween_)
266 , debug_ (distributor.debug_)
267 , enable_cuda_rdma_ (distributor.enable_cuda_rdma_)
268 , numExports_ (distributor.numExports_)
269 , selfMessage_ (distributor.selfMessage_)
270 , numSends_ (distributor.numSends_)
271 , imagesTo_ (distributor.imagesTo_)
272 , startsTo_ (distributor.startsTo_)
273 , lengthsTo_ (distributor.lengthsTo_)
274 , maxSendLength_ (distributor.maxSendLength_)
275 , indicesTo_ (distributor.indicesTo_)
276 , numReceives_ (distributor.numReceives_)
277 , totalReceiveLength_ (distributor.totalReceiveLength_)
278 , lengthsFrom_ (distributor.lengthsFrom_)
279 , imagesFrom_ (distributor.imagesFrom_)
280 , startsFrom_ (distributor.startsFrom_)
281 , indicesFrom_ (distributor.indicesFrom_)
282 , reverseDistributor_ (distributor.reverseDistributor_)
283 , lastRoundBytesSend_ (distributor.lastRoundBytesSend_)
284 , lastRoundBytesRecv_ (distributor.lastRoundBytesRecv_)
285 , useDistinctTags_ (distributor.useDistinctTags_)
287 using Teuchos::ParameterList;
288 using Teuchos::parameterList;
298 RCP<const ParameterList> rhsList = distributor.getParameterList ();
299 if (! rhsList.is_null ()) {
300 this->setMyParamList (parameterList (* rhsList));
303 #ifdef TPETRA_DISTRIBUTOR_TIMERS 305 #endif // TPETRA_DISTRIBUTOR_TIMERS 308 TEUCHOS_TEST_FOR_EXCEPTION
309 (out_.is_null (), std::logic_error,
"Tpetra::Distributor::init: debug_ " 310 "is true but out_ (pointer to the output stream) is NULL. Please " 311 "report this bug to the Tpetra developers.");
312 Teuchos::OSTab tab (out_);
313 std::ostringstream os;
314 os << comm_->getRank ()
315 <<
": Distributor copy ctor done" << std::endl;
321 using Teuchos::ParameterList;
322 using Teuchos::parameterList;
325 std::swap (comm_, rhs.comm_);
326 std::swap (out_, rhs.out_);
327 std::swap (howInitialized_, rhs.howInitialized_);
328 std::swap (sendType_, rhs.sendType_);
329 std::swap (barrierBetween_, rhs.barrierBetween_);
330 std::swap (debug_, rhs.debug_);
331 std::swap (enable_cuda_rdma_, rhs.enable_cuda_rdma_);
332 std::swap (numExports_, rhs.numExports_);
333 std::swap (selfMessage_, rhs.selfMessage_);
334 std::swap (numSends_, rhs.numSends_);
335 std::swap (imagesTo_, rhs.imagesTo_);
336 std::swap (startsTo_, rhs.startsTo_);
337 std::swap (lengthsTo_, rhs.lengthsTo_);
338 std::swap (maxSendLength_, rhs.maxSendLength_);
339 std::swap (indicesTo_, rhs.indicesTo_);
340 std::swap (numReceives_, rhs.numReceives_);
341 std::swap (totalReceiveLength_, rhs.totalReceiveLength_);
342 std::swap (lengthsFrom_, rhs.lengthsFrom_);
343 std::swap (imagesFrom_, rhs.imagesFrom_);
344 std::swap (startsFrom_, rhs.startsFrom_);
345 std::swap (indicesFrom_, rhs.indicesFrom_);
346 std::swap (reverseDistributor_, rhs.reverseDistributor_);
347 std::swap (lastRoundBytesSend_, rhs.lastRoundBytesSend_);
348 std::swap (lastRoundBytesRecv_, rhs.lastRoundBytesRecv_);
349 std::swap (useDistinctTags_, rhs.useDistinctTags_);
353 RCP<ParameterList> lhsList = this->getNonconstParameterList ();
354 RCP<ParameterList> rhsList = rhs.getNonconstParameterList ();
355 if (lhsList.getRawPtr () == rhsList.getRawPtr () && ! rhsList.is_null ()) {
356 rhsList = parameterList (*rhsList);
358 if (! rhsList.is_null ()) {
359 this->setMyParamList (rhsList);
361 if (! lhsList.is_null ()) {
362 rhs.setMyParamList (lhsList);
373 TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
374 "Tpetra::Distributor: Destructor called with " << requests_.size()
375 <<
" outstanding posts (unfulfilled communication requests). There " 376 "should be none at this point. Please report this bug to the Tpetra " 383 using Teuchos::FancyOStream;
384 using Teuchos::getIntegralValue;
385 using Teuchos::includesVerbLevel;
386 using Teuchos::OSTab;
387 using Teuchos::ParameterList;
388 using Teuchos::parameterList;
393 plist->validateParametersAndSetDefaults (*validParams);
395 const bool barrierBetween =
396 plist->get<
bool> (
"Barrier between receives and sends");
398 getIntegralValue<Details::EDistributorSendType> (*plist,
"Send type");
399 const bool useDistinctTags = plist->get<
bool> (
"Use distinct tags");
400 const bool debug = plist->get<
bool> (
"Debug");
401 const bool enable_cuda_rdma = plist->get<
bool> (
"Enable MPI CUDA RDMA support");
407 TEUCHOS_TEST_FOR_EXCEPTION(
408 ! barrierBetween && sendType == Details::DISTRIBUTOR_RSEND,
409 std::invalid_argument,
"Tpetra::Distributor::setParameterList: " << endl
410 <<
"You specified \"Send type\"=\"Rsend\", but turned off the barrier " 411 "between receives and sends." << endl <<
"This is invalid; you must " 412 "include the barrier if you use ready sends." << endl <<
"Ready sends " 413 "require that their corresponding receives have already been posted, " 414 "and the only way to guarantee that in general is with a barrier.");
417 sendType_ = sendType;
418 barrierBetween_ = barrierBetween;
419 useDistinctTags_ = useDistinctTags;
421 enable_cuda_rdma_ = enable_cuda_rdma;
425 this->setMyParamList (plist);
428 Teuchos::RCP<const Teuchos::ParameterList>
431 using Teuchos::Array;
432 using Teuchos::ParameterList;
433 using Teuchos::parameterList;
435 using Teuchos::setStringToIntegralParameter;
437 const bool barrierBetween = barrierBetween_default;
438 const bool useDistinctTags = useDistinctTags_default;
439 const bool debug = tpetraDistributorDebugDefault;
440 const bool enable_cuda_rdma = enable_cuda_rdma_default;
443 const std::string defaultSendType (
"Send");
444 Array<Details::EDistributorSendType> sendTypeEnums;
445 sendTypeEnums.push_back (Details::DISTRIBUTOR_ISEND);
446 sendTypeEnums.push_back (Details::DISTRIBUTOR_RSEND);
447 sendTypeEnums.push_back (Details::DISTRIBUTOR_SEND);
448 sendTypeEnums.push_back (Details::DISTRIBUTOR_SSEND);
450 RCP<ParameterList> plist = parameterList (
"Tpetra::Distributor");
451 plist->set (
"Barrier between receives and sends", barrierBetween,
452 "Whether to execute a barrier between receives and sends in do" 453 "[Reverse]Posts(). Required for correctness when \"Send type\"" 454 "=\"Rsend\", otherwise correct but not recommended.");
455 setStringToIntegralParameter<Details::EDistributorSendType> (
"Send type",
456 defaultSendType,
"When using MPI, the variant of send to use in " 457 "do[Reverse]Posts()", sendTypes(), sendTypeEnums(), plist.getRawPtr());
458 plist->set (
"Use distinct tags", useDistinctTags,
"Whether to use distinct " 459 "MPI message tags for different code paths.");
460 plist->set (
"Debug", debug,
"Whether to print copious debugging output on " 462 plist->set (
"Enable MPI CUDA RDMA support", enable_cuda_rdma,
463 "Whether to enable RDMA support for MPI communication between " 464 "CUDA GPUs. Only enable this if you know for sure your MPI " 465 "library supports it.");
473 Teuchos::setupVerboseObjectSublist (&*plist);
474 return Teuchos::rcp_const_cast<
const ParameterList> (plist);
479 {
return totalReceiveLength_; }
482 {
return numReceives_; }
485 {
return selfMessage_; }
488 {
return numSends_; }
491 {
return maxSendLength_; }
494 {
return imagesFrom_; }
497 {
return lengthsFrom_; }
500 {
return imagesTo_; }
503 {
return lengthsTo_; }
505 Teuchos::RCP<Distributor>
507 if (reverseDistributor_.is_null ()) {
508 createReverseDistributor ();
510 return reverseDistributor_;
515 Distributor::createReverseDistributor()
const 517 reverseDistributor_ = Teuchos::rcp (
new Distributor (comm_));
522 size_t totalSendLength =
523 std::accumulate (lengthsTo_.begin(), lengthsTo_.end(), 0);
528 size_t maxReceiveLength = 0;
529 const int myImageID = comm_->getRank();
530 for (
size_t i=0; i < numReceives_; ++i) {
531 if (imagesFrom_[i] != myImageID) {
533 if (lengthsFrom_[i] > maxReceiveLength) {
534 maxReceiveLength = lengthsFrom_[i];
542 reverseDistributor_->lengthsTo_ = lengthsFrom_;
543 reverseDistributor_->imagesTo_ = imagesFrom_;
544 reverseDistributor_->indicesTo_ = indicesFrom_;
545 reverseDistributor_->startsTo_ = startsFrom_;
546 reverseDistributor_->lengthsFrom_ = lengthsTo_;
547 reverseDistributor_->imagesFrom_ = imagesTo_;
548 reverseDistributor_->indicesFrom_ = indicesTo_;
549 reverseDistributor_->startsFrom_ = startsTo_;
550 reverseDistributor_->numSends_ = numReceives_;
551 reverseDistributor_->numReceives_ = numSends_;
552 reverseDistributor_->selfMessage_ = selfMessage_;
553 reverseDistributor_->maxSendLength_ = maxReceiveLength;
554 reverseDistributor_->totalReceiveLength_ = totalSendLength;
555 reverseDistributor_->howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE;
565 using Teuchos::Array;
566 using Teuchos::CommRequest;
567 using Teuchos::FancyOStream;
568 using Teuchos::includesVerbLevel;
569 using Teuchos::is_null;
570 using Teuchos::OSTab;
572 using Teuchos::waitAll;
575 Teuchos::OSTab tab (out_);
577 #ifdef TPETRA_DISTRIBUTOR_TIMERS 578 Teuchos::TimeMonitor timeMon (*timer_doWaits_);
579 #endif // TPETRA_DISTRIBUTOR_TIMERS 581 const int myRank = comm_->getRank ();
584 std::ostringstream os;
585 os << myRank <<
": doWaits: # reqs = " 586 << requests_.size () << endl;
590 if (requests_.size() > 0) {
591 waitAll (*comm_, requests_());
593 #ifdef HAVE_TEUCHOS_DEBUG 595 for (Array<RCP<CommRequest<int> > >::const_iterator it = requests_.begin();
596 it != requests_.end(); ++it)
598 TEUCHOS_TEST_FOR_EXCEPTION( ! is_null (*it), std::runtime_error,
599 Teuchos::typeName(*
this) <<
"::doWaits(): Communication requests " 600 "should all be null aftr calling Teuchos::waitAll() on them, but " 601 "at least one request is not null.");
603 #endif // HAVE_TEUCHOS_DEBUG 606 requests_.resize (0);
609 #ifdef HAVE_TEUCHOS_DEBUG 611 const int localSizeNonzero = (requests_.size () != 0) ? 1 : 0;
612 int globalSizeNonzero = 0;
613 Teuchos::reduceAll<int, int> (*comm_, Teuchos::REDUCE_MAX,
615 Teuchos::outArg (globalSizeNonzero));
616 TEUCHOS_TEST_FOR_EXCEPTION(
617 globalSizeNonzero != 0, std::runtime_error,
618 "Tpetra::Distributor::doWaits: After waitAll, at least one process has " 619 "a nonzero number of outstanding posts. There should be none at this " 620 "point. Please report this bug to the Tpetra developers.");
622 #endif // HAVE_TEUCHOS_DEBUG 625 std::ostringstream os;
626 os << myRank <<
": doWaits done" << endl;
633 if (! reverseDistributor_.is_null()) {
634 reverseDistributor_->doWaits();
639 std::ostringstream out;
641 out <<
"\"Tpetra::Distributor\": {";
642 const std::string label = this->getObjectLabel ();
644 out <<
"Label: " << label <<
", ";
646 out <<
"How initialized: " 650 << DistributorSendTypeEnumToString (sendType_)
651 <<
", Barrier between receives and sends: " 652 << (barrierBetween_ ?
"true" :
"false")
653 <<
", Use distinct tags: " 654 << (useDistinctTags_ ?
"true" :
"false")
655 <<
", Debug: " << (debug_ ?
"true" :
"false")
656 <<
", Enable MPI CUDA RDMA support: " 657 << (enable_cuda_rdma_ ?
"true" :
"false")
664 const Teuchos::EVerbosityLevel verbLevel)
const 668 using Teuchos::VERB_DEFAULT;
669 using Teuchos::VERB_NONE;
670 using Teuchos::VERB_LOW;
671 using Teuchos::VERB_MEDIUM;
672 using Teuchos::VERB_HIGH;
673 using Teuchos::VERB_EXTREME;
674 Teuchos::EVerbosityLevel vl = verbLevel;
675 if (vl == VERB_DEFAULT) vl = VERB_LOW;
676 const int myImageID = comm_->getRank();
677 const int numImages = comm_->getSize();
678 Teuchos::OSTab tab (out);
680 if (vl == VERB_NONE) {
683 if (myImageID == 0) {
687 out <<
"\"Tpetra::Distributor\":" << endl;
688 Teuchos::OSTab tab2 (out);
689 const std::string label = this->getObjectLabel ();
691 out <<
"Label: " << label << endl;
693 out <<
"How initialized: " 695 << endl <<
"Parameters: " << endl;
697 Teuchos::OSTab tab3 (out);
698 out <<
"\"Send type\": " 699 << DistributorSendTypeEnumToString (sendType_) << endl
700 <<
"\"Barrier between receives and sends\": " 701 << (barrierBetween_ ?
"true" :
"false") << endl;
702 out <<
"\"Use distinct tags\": " 703 << (useDistinctTags_ ?
"true" :
"false") << endl;
704 out <<
"\"Debug\": " << (debug_ ?
"true" :
"false") << endl;
705 out <<
"\"Enable MPI CUDA RDMA support\": " <<
706 (enable_cuda_rdma_ ?
"true" :
"false") << endl;
709 if (vl == VERB_LOW) {
712 Teuchos::OSTab tab2 (out);
717 for (
int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
718 if (myImageID == imageCtr) {
719 if (myImageID == 0) {
720 out <<
"Number of processes: " << numImages << endl;
722 out <<
"Process: " << myImageID << endl;
723 Teuchos::OSTab tab3 (out);
726 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
727 out <<
"imagesTo: " << toString (imagesTo_) << endl;
728 out <<
"lengthsTo: " << toString (lengthsTo_) << endl;
731 if (vl == VERB_EXTREME) {
732 out <<
"startsTo: " << toString (startsTo_) << endl;
733 out <<
"indicesTo: " << toString (indicesTo_) << endl;
735 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
738 out <<
"lengthsFrom: " << toString (lengthsFrom_) << endl;
739 out <<
"startsFrom: " << toString (startsFrom_) << endl;
740 out <<
"imagesFrom: " << toString (imagesFrom_) << endl;
757 Distributor::computeReceives ()
759 using Teuchos::Array;
761 using Teuchos::CommStatus;
762 using Teuchos::CommRequest;
763 using Teuchos::ireceive;
766 using Teuchos::REDUCE_SUM;
767 using Teuchos::receive;
768 using Teuchos::reduce;
769 using Teuchos::scatter;
771 using Teuchos::waitAll;
774 Teuchos::OSTab tab (out_);
775 const int myRank = comm_->getRank();
776 const int numProcs = comm_->getSize();
779 const int pathTag = 2;
780 const int tag = this->getTag (pathTag);
783 std::ostringstream os;
784 os << myRank <<
": computeReceives: " 785 "{selfMessage_: " << (selfMessage_ ?
"true" :
"false")
786 <<
", tag: " << tag <<
"}" << endl;
796 Array<int> toNodesFromMe (numProcs, 0);
797 #ifdef HAVE_TEUCHOS_DEBUG 798 bool counting_error =
false;
799 #endif // HAVE_TEUCHOS_DEBUG 800 for (
size_t i = 0; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) {
801 #ifdef HAVE_TEUCHOS_DEBUG 802 if (toNodesFromMe[imagesTo_[i]] != 0) {
803 counting_error =
true;
805 #endif // HAVE_TEUCHOS_DEBUG 806 toNodesFromMe[imagesTo_[i]] = 1;
808 #ifdef HAVE_TEUCHOS_DEBUG 810 "Tpetra::Distributor::computeReceives: There was an error on at least " 811 "one process in counting the number of messages send by that process to " 812 "the other processs. Please report this bug to the Tpetra developers.",
814 #endif // HAVE_TEUCHOS_DEBUG 817 std::ostringstream os;
818 os << myRank <<
": computeReceives: Calling reduce and scatter" << endl;
875 Array<int> numRecvsOnEachProc;
876 if (myRank == root) {
877 numRecvsOnEachProc.resize (numProcs);
879 int numReceivesAsInt = 0;
880 reduce<int, int> (toNodesFromMe.getRawPtr (),
881 numRecvsOnEachProc.getRawPtr (),
882 numProcs, REDUCE_SUM, root, *comm_);
883 scatter<int, int> (numRecvsOnEachProc.getRawPtr (), 1,
884 &numReceivesAsInt, 1, root, *comm_);
885 numReceives_ =
static_cast<size_t> (numReceivesAsInt);
891 lengthsFrom_.assign (numReceives_, 0);
892 imagesFrom_.assign (numReceives_, 0);
908 const size_t actualNumReceives = numReceives_ - (selfMessage_ ? 1 : 0);
914 Array<RCP<CommRequest<int> > > requests (actualNumReceives);
915 Array<ArrayRCP<size_t> > lengthsFromBuffers (actualNumReceives);
916 Array<RCP<CommStatus<int> > > statuses (actualNumReceives);
921 const int anySourceProc = MPI_ANY_SOURCE;
923 const int anySourceProc = -1;
927 std::ostringstream os;
928 os << myRank <<
": computeReceives: Posting " 929 << actualNumReceives <<
" irecvs" << endl;
934 for (
size_t i = 0; i < actualNumReceives; ++i) {
939 lengthsFromBuffers[i].resize (1);
940 lengthsFromBuffers[i][0] = as<size_t> (0);
941 requests[i] = ireceive<int, size_t> (lengthsFromBuffers[i], anySourceProc, tag, *comm_);
943 std::ostringstream os;
944 os << myRank <<
": computeReceives: " 945 "Posted any-proc irecv w/ specified tag " << tag << endl;
951 std::ostringstream os;
952 os << myRank <<
": computeReceives: " 953 "posting " << numSends_ <<
" sends" << endl;
964 for (
size_t i = 0; i < numSends_ + (selfMessage_ ? 1 : 0); ++i) {
965 if (imagesTo_[i] != myRank) {
969 const size_t*
const lengthsTo_i = &lengthsTo_[i];
970 send<int, size_t> (lengthsTo_i, 1, as<int> (imagesTo_[i]), tag, *comm_);
972 std::ostringstream os;
973 os << myRank <<
": computeReceives: " 974 "Posted send to Proc " << imagesTo_[i] <<
" w/ specified tag " 986 lengthsFrom_[numReceives_-1] = lengthsTo_[i];
987 imagesFrom_[numReceives_-1] = myRank;
992 std::ostringstream os;
993 os << myRank <<
": computeReceives: waitAll on " 994 << requests.size () <<
" requests" << endl;
1003 waitAll (*comm_, requests (), statuses ());
1004 for (
size_t i = 0; i < actualNumReceives; ++i) {
1005 lengthsFrom_[i] = *lengthsFromBuffers[i];
1006 imagesFrom_[i] = statuses[i]->getSourceRank ();
1012 sort2 (imagesFrom_.begin(), imagesFrom_.end(), lengthsFrom_.begin());
1015 totalReceiveLength_ = std::accumulate (lengthsFrom_.begin(), lengthsFrom_.end(), 0);
1016 indicesFrom_.clear ();
1017 indicesFrom_.reserve (totalReceiveLength_);
1018 for (
size_t i = 0; i < totalReceiveLength_; ++i) {
1019 indicesFrom_.push_back(i);
1022 startsFrom_.clear ();
1023 startsFrom_.reserve (numReceives_);
1024 for (
size_t i = 0, j = 0; i < numReceives_; ++i) {
1025 startsFrom_.push_back(j);
1026 j += lengthsFrom_[i];
1034 std::ostringstream os;
1035 os << myRank <<
": computeReceives: done" << endl;
1043 using Teuchos::outArg;
1044 using Teuchos::REDUCE_MAX;
1045 using Teuchos::reduceAll;
1048 Teuchos::OSTab tab (out_);
1050 numExports_ = exportNodeIDs.size();
1052 const int myImageID = comm_->getRank();
1053 const int numImages = comm_->getSize();
1055 std::ostringstream os;
1056 os << myImageID <<
": createFromSends" << endl;
1108 Teuchos::Array<size_t> starts (numImages + 1, 0);
1111 size_t numActive = 0;
1112 int needSendBuff = 0;
1114 #ifdef HAVE_TPETRA_DEBUG 1116 #endif // HAVE_TPETRA_DEBUG 1117 for (
size_t i = 0; i < numExports_; ++i) {
1118 const int exportID = exportNodeIDs[i];
1119 if (exportID >= numImages) {
1120 #ifdef HAVE_TPETRA_DEBUG 1122 #endif // HAVE_TPETRA_DEBUG 1125 else if (exportID >= 0) {
1139 if (needSendBuff==0 && starts[exportID] > 1 && exportID != exportNodeIDs[i-1]) {
1146 #ifdef HAVE_TPETRA_DEBUG 1153 reduceAll<int, int> (*comm_, REDUCE_MAX, badID, outArg (gbl_badID));
1154 TEUCHOS_TEST_FOR_EXCEPTION(gbl_badID >= 0, std::runtime_error,
1155 Teuchos::typeName(*
this) <<
"::createFromSends(): Process " << gbl_badID
1156 <<
", perhaps among other processes, got a bad send process ID.");
1171 #endif // HAVE_TPETRA_DEBUG 1173 #if defined(HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS) || defined(HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS) 1175 int global_needSendBuff;
1176 reduceAll<int, int> (*comm_, REDUCE_MAX, needSendBuff,
1177 outArg (global_needSendBuff));
1179 global_needSendBuff != 0, std::runtime_error,
1180 "::createFromSends: Grouping export IDs together by process rank often " 1181 "improves performance.");
1187 if (starts[myImageID] != 0) {
1188 selfMessage_ =
true;
1191 selfMessage_ =
false;
1194 #ifdef HAVE_TEUCHOS_DEBUG 1195 bool index_neq_numActive =
false;
1196 bool send_neq_numSends =
false;
1198 if (! needSendBuff) {
1203 for (
int i = 0; i < numImages; ++i) {
1211 indicesTo_.resize(0);
1214 imagesTo_.assign(numSends_,0);
1215 startsTo_.assign(numSends_,0);
1216 lengthsTo_.assign(numSends_,0);
1223 size_t index = 0, nodeIndex = 0;
1224 for (
size_t i = 0; i < numSends_; ++i) {
1225 while (exportNodeIDs[nodeIndex] < 0) {
1228 startsTo_[i] = nodeIndex;
1229 int imageID = exportNodeIDs[nodeIndex];
1230 imagesTo_[i] = imageID;
1231 index += starts[imageID];
1232 nodeIndex += starts[imageID];
1234 #ifdef HAVE_TEUCHOS_DEBUG 1235 if (index != numActive) {
1236 index_neq_numActive =
true;
1242 if (numSends_ > 0) {
1243 sort2(imagesTo_.begin(), imagesTo_.end(), startsTo_.begin());
1247 for (
size_t i = 0; i < numSends_; ++i) {
1248 int imageID = imagesTo_[i];
1249 lengthsTo_[i] = starts[imageID];
1250 if ((imageID != myImageID) && (lengthsTo_[i] > maxSendLength_)) {
1251 maxSendLength_ = lengthsTo_[i];
1262 if (starts[0] == 0 ) {
1268 for (Teuchos::Array<size_t>::iterator i=starts.begin()+1,
1270 i != starts.end(); ++i)
1272 if (*i != 0) ++numSends_;
1278 for (Teuchos::Array<size_t>::reverse_iterator ip1=starts.rbegin(),
1279 i=starts.rbegin()+1;
1280 i != starts.rend(); ++i)
1289 indicesTo_.resize(numActive);
1291 for (
size_t i = 0; i < numExports_; ++i) {
1292 if (exportNodeIDs[i] >= 0) {
1294 indicesTo_[starts[exportNodeIDs[i]]] = i;
1296 ++starts[exportNodeIDs[i]];
1308 for (
int node = numImages-1; node != 0; --node) {
1309 starts[node] = starts[node-1];
1312 starts[numImages] = numActive;
1319 imagesTo_.resize(numSends_);
1320 startsTo_.resize(numSends_);
1321 lengthsTo_.resize(numSends_);
1328 for (
int node = 0; node < numImages; ++node ) {
1329 if (starts[node+1] != starts[node]) {
1330 lengthsTo_[snd] = starts[node+1] - starts[node];
1331 startsTo_[snd] = starts[node];
1333 if ((node != myImageID) && (lengthsTo_[snd] > maxSendLength_)) {
1334 maxSendLength_ = lengthsTo_[snd];
1336 imagesTo_[snd] = node;
1340 #ifdef HAVE_TEUCHOS_DEBUG 1341 if (snd != numSends_) {
1342 send_neq_numSends =
true;
1346 #ifdef HAVE_TEUCHOS_DEBUG 1348 "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
1350 "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
1353 if (selfMessage_) --numSends_;
1359 std::ostringstream os;
1360 os << myImageID <<
": createFromSends: done" << endl;
1366 howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS;
1368 return totalReceiveLength_;
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t getNumReceives() const
The number of processes from which we will receive data.
std::string description() const
A simple one-line description of this object.
ArrayView< const int > getImagesTo() const
Ranks of the processes to which this process will send values.
EDistributorHowInitialized
Enum indicating how and whether a Distributor was initialized.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
List of valid Distributor parameters.
void swap(Distributor &rhs)
Swap the contents of rhs with those of *this.
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
ArrayView< const size_t > getLengthsFrom() const
Number of values this process will receive from each process.
Implementation details of Tpetra.
bool hasSelfMessage() const
Whether the calling process will send or receive messages to itself.
Sets up and executes a communication plan for a Tpetra DistObject.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &plist)
Set Distributor parameters.
size_t createFromSends(const ArrayView< const int > &exportNodeIDs)
Set up Distributor using list of process ranks to which this process will send.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
ArrayView< const size_t > getLengthsTo() const
Number of values this process will send to each process.
virtual ~Distributor()
Destructor (virtual for memory safety).
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
std::string DistributorHowInitializedEnumToString(EDistributorHowInitialized how)
Convert an EDistributorHowInitialized enum value to a string.
ArrayView< const int > getImagesFrom() const
Ranks of the processes sending values to this process.
size_t getNumSends() const
The number of processes to which we will send data.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream.
size_t getMaxSendLength() const
Maximum number of values this process will send to another single process.
#define SHARED_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg, comm)
Test for exception, with reduction over the given communicator.
Array< std::string > distributorSendTypes()
Valid values for Distributor's "Send type" parameter.
RCP< Distributor > getReverse() const
A reverse communication plan Distributor.
EDistributorSendType
The type of MPI send that Distributor should use.
Distributor(const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Construct using the specified communicator and default parameters.