|
Tpetra Matrix/Vector Services Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #include "Tpetra_Distributor.hpp" 00030 00031 namespace Tpetra { 00032 00033 Distributor::Distributor(const Teuchos::RCP<const Teuchos::Comm<int> > &comm) 00034 : comm_(comm) 00035 , numExports_(0) 00036 , selfMessage_(false) 00037 , numSends_(0) 00038 , maxSendLength_(0) 00039 , numReceives_(0) 00040 , totalReceiveLength_(0) 00041 {} 00042 00043 Distributor::Distributor(const Distributor & distributor) 00044 : comm_(distributor.comm_) 00045 , numExports_(distributor.numExports_) 00046 , selfMessage_(distributor.selfMessage_) 00047 , numSends_(distributor.numSends_) 00048 , maxSendLength_(distributor.maxSendLength_) 00049 , numReceives_(distributor.numReceives_) 00050 , totalReceiveLength_(distributor.totalReceiveLength_) 00051 , reverseDistributor_(distributor.reverseDistributor_) 00052 {} 00053 00054 Distributor::~Distributor() 00055 { 00056 // we shouldn't have any outstanding requests at this point; verify 00057 TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error, 00058 Teuchos::typeName(*this) << "::Distributor~(): Destructor called with outstanding posts."); 00059 } 00060 00061 size_t Distributor::getTotalReceiveLength() const 00062 { return totalReceiveLength_; } 00063 00064 size_t Distributor::getNumReceives() const 00065 { return numReceives_; } 00066 00067 bool Distributor::hasSelfMessage() const 00068 { return selfMessage_; } 00069 00070 size_t Distributor::getNumSends() const 00071 { return numSends_; } 00072 00073 size_t Distributor::getMaxSendLength() const 00074 { return maxSendLength_; } 00075 00076 Teuchos::ArrayView<const int> Distributor::getImagesFrom() const 00077 { return imagesFrom_; } 00078 00079 Teuchos::ArrayView<const size_t> Distributor::getLengthsFrom() const 00080 { return lengthsFrom_; } 00081 00082 Teuchos::ArrayView<const int> Distributor::getImagesTo() const 00083 { return imagesTo_; } 00084 00085 Teuchos::ArrayView<const size_t> Distributor::getLengthsTo() const 00086 { return lengthsTo_; } 00087 00088 const Teuchos::RCP<Distributor> & 00089 Distributor::getReverse() const { 00090 if (reverseDistributor_ == Teuchos::null) { 00091 // need to create reverse distributor 00092 createReverseDistributor(); 00093 } 00094 return reverseDistributor_; 00095 } 00096 00097 00098 void Distributor::createReverseDistributor() const { 00099 00100 reverseDistributor_ = Teuchos::rcp(new Distributor(comm_)); 00101 00102 // compute new totalSendLength 00103 size_t totalSendLength = std::accumulate(lengthsTo_.begin(),lengthsTo_.end(),0); 00104 00105 // compute new maxReceiveLength 00106 size_t maxReceiveLength = 0; 00107 const int myImageID = comm_->getRank(); 00108 for (size_t i=0; i < numReceives_; ++i) { 00109 if (imagesFrom_[i] != myImageID) { 00110 if (lengthsFrom_[i] > maxReceiveLength) { 00111 maxReceiveLength = lengthsFrom_[i]; 00112 } 00113 } 00114 } 00115 00116 // initialize all of reverseDistributor's data members 00117 reverseDistributor_->lengthsTo_ = lengthsFrom_; 00118 reverseDistributor_->imagesTo_ = imagesFrom_; 00119 reverseDistributor_->indicesTo_ = indicesFrom_; 00120 reverseDistributor_->startsTo_ = startsFrom_; 00121 reverseDistributor_->lengthsFrom_ = lengthsTo_; 00122 reverseDistributor_->imagesFrom_ = imagesTo_; 00123 reverseDistributor_->indicesFrom_ = indicesTo_; 00124 reverseDistributor_->startsFrom_ = startsTo_; 00125 reverseDistributor_->numSends_ = numReceives_; 00126 reverseDistributor_->numReceives_ = numSends_; 00127 reverseDistributor_->selfMessage_ = selfMessage_; 00128 reverseDistributor_->maxSendLength_ = maxReceiveLength; 00129 reverseDistributor_->totalReceiveLength_ = totalSendLength; 00130 // Note: technically, I am my reverse distributor's reverse distributor, but 00131 // we will not set this up, as it gives us an opportunity to test 00132 // that reverseDistributor is an inverse operation w.r.t. value semantics of distributors 00133 // Note: numExports_ was not copied 00134 } 00135 00136 00137 void Distributor::doWaits() { 00138 if (getNumReceives() > 0) { 00139 Teuchos::waitAll(*comm_,requests_()); 00140 // Requests should all be null, clear them 00141 #ifdef HAVE_TEUCHOS_DEBUG 00142 for (Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest> >::const_iterator i = requests_.begin(); 00143 i != requests_.end(); ++i) 00144 { 00145 TEST_FOR_EXCEPTION(*i != Teuchos::null, std::runtime_error, 00146 Teuchos::typeName(*this) << "::doWaits(): Requests should be null after call to Teuchos::waitAll()."); 00147 } 00148 #endif 00149 requests_.clear(); 00150 } 00151 } 00152 00153 00154 void Distributor::doReverseWaits() 00155 { 00156 // call doWaits() on the reverse Distributor, if it exists 00157 if (reverseDistributor_ != Teuchos::null) { 00158 reverseDistributor_->doWaits(); 00159 } 00160 } 00161 00162 std::string Distributor::description() const 00163 { 00164 std::ostringstream oss; 00165 oss << Teuchos::Describable::description(); 00166 return oss.str(); 00167 } 00168 00169 void Distributor::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const 00170 { 00171 using std::endl; 00172 using std::setw; 00173 using Teuchos::VERB_DEFAULT; 00174 using Teuchos::VERB_NONE; 00175 using Teuchos::VERB_LOW; 00176 using Teuchos::VERB_MEDIUM; 00177 using Teuchos::VERB_HIGH; 00178 using Teuchos::VERB_EXTREME; 00179 Teuchos::EVerbosityLevel vl = verbLevel; 00180 if (vl == VERB_DEFAULT) vl = VERB_LOW; 00181 const int myImageID = comm_->getRank(); 00182 const int numImages = comm_->getSize(); 00183 Teuchos::OSTab tab(out); 00184 if (vl != VERB_NONE) { 00185 // VERB_LOW and higher prints description() 00186 if (myImageID == 0) out << this->description() << std::endl; 00187 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) { 00188 if (myImageID == imageCtr) { 00189 if (vl != VERB_LOW) { 00190 out << "[Node " << myImageID << " of " << numImages << "]" << endl; 00191 out << " selfMessage: " << hasSelfMessage() << endl; 00192 out << " numSends: " << getNumSends() << endl; 00193 if (vl == VERB_HIGH || vl == VERB_EXTREME) { 00194 out << " imagesTo: " << toString(imagesTo_) << endl; 00195 out << " lengthsTo: " << toString(lengthsTo_) << endl; 00196 out << " maxSendLength: " << getMaxSendLength() << endl; 00197 } 00198 if (vl == VERB_EXTREME) { 00199 out << " startsTo: " << toString(startsTo_) << endl; 00200 out << " indicesTo: " << toString(indicesTo_) << endl; 00201 } 00202 if (vl == VERB_HIGH || vl == VERB_EXTREME) { 00203 out << " numReceives: " << getNumReceives() << endl; 00204 out << " totalReceiveLength: " << getTotalReceiveLength() << endl; 00205 out << " lengthsFrom: " << toString(lengthsFrom_) << endl; 00206 out << " imagesFrom: " << toString(imagesFrom_) << endl; 00207 } 00208 } 00209 } 00210 } 00211 } 00212 } 00213 00214 00216 void Distributor::computeReceives() 00217 { 00218 int myImageID = comm_->getRank(); 00219 int numImages = comm_->getSize(); 00220 00221 // to_nodes_from_me[i] == number of messages sent by this node to node i 00222 // the info in numSends_, imagesTo_, lengthsTo_ concerns the contiguous sends 00223 // therefore, each node will be listed in imagesTo_ at most once 00224 { 00225 Teuchos::Array<size_t> to_nodes_from_me(numImages,0); 00226 # ifdef HAVE_TEUCHOS_DEBUG 00227 bool counting_error = false; 00228 # endif 00229 for (size_t i=0; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) { 00230 # ifdef HAVE_TEUCHOS_DEBUG 00231 if (to_nodes_from_me[imagesTo_[i]] != 0) counting_error = true; 00232 # endif 00233 to_nodes_from_me[imagesTo_[i]] = 1; 00234 } 00235 # ifdef HAVE_TEUCHOS_DEBUG 00236 SHARED_TEST_FOR_EXCEPTION(counting_error, std::logic_error, 00237 "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_); 00238 # endif 00239 // each proc will get back only one item (hence, counts = ones) from the array of globals sums, 00240 // namely that entry corresponding to the node, and detailing how many receives it has. 00241 // this total includes self sends 00242 Teuchos::Array<int> counts(numImages, 1); 00243 Teuchos::reduceAllAndScatter<int,size_t>(*comm_,Teuchos::REDUCE_SUM,numImages,&to_nodes_from_me[0],&counts[0],&numReceives_); 00244 } 00245 00246 // assign these to length numReceives, with zero entries 00247 lengthsFrom_.assign(numReceives_, 0); 00248 imagesFrom_.assign(numReceives_, 0); 00249 00250 // FINISH: why do these work? they are blocking sends, and should block until completion, which happens below 00251 // FINISH: consider switching them to non-blocking 00252 // NOTE: epetra has both, old (non-blocking) and new (mysterious) 00253 00254 for (size_t i=0; i < numSends_ + (selfMessage_ ? 1 : 0); ++i) { 00255 if (imagesTo_[i] != myImageID ) { 00256 // send a message to imagesTo_[i], telling him that our pattern sends him lengthsTo_[i] blocks of packets 00257 Teuchos::send(*comm_,lengthsTo_[i],imagesTo_[i]); 00258 } 00259 else { 00260 // set selfMessage_ to end block of recv arrays 00261 lengthsFrom_[numReceives_-1] = lengthsTo_[i]; 00262 imagesFrom_[numReceives_-1] = myImageID; 00263 } 00264 } 00265 00266 // 00267 for (size_t i=0; i < numReceives_ - (selfMessage_ ? 1 : 0); ++i) { 00268 // receive one variable from any sender. 00269 // store the value in lengthsFrom_[i], and store the sender's ImageID in imagesFrom_[i] 00270 // imagesFrom_[i] = comm_->receive(&lengthsFrom_[i], 1, -1); 00271 imagesFrom_[i] = Teuchos::receive(*comm_,-1,&lengthsFrom_[i]); 00272 } 00273 comm_->barrier(); 00274 00275 sort2(imagesFrom_.begin(), imagesFrom_.end(), lengthsFrom_.begin()); 00276 00277 // Compute indicesFrom_ 00278 totalReceiveLength_ = std::accumulate(lengthsFrom_.begin(), lengthsFrom_.end(), 0); 00279 indicesFrom_.clear(); 00280 indicesFrom_.reserve(totalReceiveLength_); 00281 for (size_t i=0; i < totalReceiveLength_; ++i) { 00282 indicesFrom_.push_back(i); 00283 } 00284 00285 startsFrom_.clear(); 00286 startsFrom_.reserve(numReceives_); 00287 for (size_t i=0, j = 0; i < numReceives_; ++i) { 00288 startsFrom_.push_back(j); 00289 j += lengthsFrom_[i]; 00290 } 00291 00292 if (selfMessage_) --numReceives_; 00293 00294 comm_->barrier(); 00295 } 00296 00297 00299 size_t Distributor::createFromSends(const Teuchos::ArrayView<const int> &exportNodeIDs) { 00300 using Teuchos::outArg; 00301 numExports_ = exportNodeIDs.size(); 00302 00303 const int myImageID = comm_->getRank(); 00304 const int numImages = comm_->getSize(); 00305 00306 // exportNodeIDs tells us the communication pattern for this distributor 00307 // it dictates the way that the export data will be interpretted in doPosts() 00308 // we want to perform at most one communication per node; this is for two 00309 // reasons: 00310 // * minimize latency/overhead in the comm routines (nice) 00311 // * match the number of receives and sends between nodes (necessary) 00312 // Teuchos::Comm requires that the data for a send is contiguous in a send 00313 // buffer. 00314 // Therefore, if the data in the send buffer for doPosts() is not 00315 // contiguous, it will need to be copied into a contiguous buffer. 00316 // 00317 // The user has specified this pattern and we can't do anything about it. 00318 // 00319 // However, if they do not provide an efficient pattern, we will warn them 00320 // if one of 00321 // HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS 00322 // HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS 00323 // is on. 00324 // 00325 // If the data is contiguous, then we can post the sends in situ. 00326 // 00327 // Determine contiguity. There are a number of ways to do this: 00328 // * if the export IDs are sorted, then all exports to a particular 00329 // node must be contiguous. This is how Epetra does it. 00330 // * if the export ID of the current export already has been listed, 00331 // then the previous listing should correspond to the same export. 00332 // This tests contiguity, but not sortedness. 00333 // Both of these tests require O(n), where n is the number of 00334 // exports. However, the latter will positively identify a greater 00335 // portion of contiguous patterns. We will use the latter method. 00336 // 00337 // Check to see if values are grouped by images without gaps 00338 // If so, indices_to -> 0. 00339 00340 // Setup data structures for quick traversal of arrays. 00341 // This contains the number of sends for each image id. 00342 Teuchos::Array<size_t> starts(numImages + 1, 0); 00343 00344 // numActive is the number of sends that are not Null 00345 size_t numActive = 0; 00346 char needSendBuff = 0; 00347 00348 int badID = -1; 00349 for (size_t i = 0; i < numExports_; ++i) { 00350 int exportID = exportNodeIDs[i]; 00351 if (exportID >= numImages) { 00352 badID = myImageID; 00353 break; 00354 } 00355 else if (exportID >= 0) { 00356 // increment starts[exportID] 00357 ++starts[exportID]; 00358 // if after incrementing it is greater than one, check that the 00359 // previous export went to this node 00360 // this is a safe comparison, because starts[exportID] > 1 00361 // implies that i > 1. 00362 // null entries break continuity. 00363 // e.g., [ 0, 0, 0, 1, -99, 1, 2, 2, 2] is not contiguous 00364 if (needSendBuff==0 && starts[exportID] > 1 && exportID != exportNodeIDs[i-1]) { 00365 needSendBuff = 1; 00366 } 00367 ++numActive; 00368 } 00369 } 00370 { 00371 int gbl_badID; 00372 Teuchos::reduceAll(*comm_,Teuchos::REDUCE_MAX,badID,outArg(gbl_badID)); 00373 TEST_FOR_EXCEPTION(gbl_badID >= 0, std::runtime_error, 00374 Teuchos::typeName(*this) << "::createFromSends(): bad node id listed on node " << gbl_badID << "."); 00375 } 00376 00377 # if defined(HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS) || defined(HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS) 00378 { 00379 char global_needSendBuff; 00380 Teuchos::reduceAll(*comm_,Teuchos::REDUCE_MAX,needSendBuff,&global_needSendBuff); 00381 TPETRA_EFFICIENCY_WARNING(global_needSendBuff,std::runtime_error, 00382 "::createFromSends(): Grouping export IDs together leads to improved performance."); 00383 } 00384 # endif 00385 00386 if (starts[myImageID] != 0) { 00387 selfMessage_ = true; 00388 } 00389 else { 00390 selfMessage_ = false; 00391 } 00392 00393 00394 #ifdef HAVE_TEUCHOS_DEBUG 00395 bool index_neq_numActive = false; 00396 bool send_neq_numSends = false; 00397 #endif 00398 if (!needSendBuff) { 00399 // grouped by image, no send buffer or indicesTo_ needed 00400 numSends_ = 0; 00401 // count total number of sends, i.e., total number of images that we are sending to 00402 // this includes myself 00403 for (int i=0; i < numImages; ++i) { 00404 if (starts[i]) ++numSends_; 00405 } 00406 00407 // not only do we not need these, but we must clear it, as empty status of indicesTo 00408 // is a flag used later 00409 indicesTo_.resize(0); 00410 // size these to numSends_; note, at the moment, numSends_ includes self sends 00411 // set their values to zeros 00412 imagesTo_.assign(numSends_,0); 00413 startsTo_.assign(numSends_,0); 00414 lengthsTo_.assign(numSends_,0); 00415 00416 // set startsTo to the offsent for each send (i.e., each image ID) 00417 // set imagesTo to the image ID for each send 00418 // in interpretting this code, remember that we are assuming contiguity 00419 // that is why index skips through the ranks 00420 { 00421 size_t index = 0, nodeIndex = 0; 00422 for (size_t i = 0; i < numSends_; ++i) { 00423 while (exportNodeIDs[nodeIndex] < 0) { 00424 ++nodeIndex; // skip all negative node IDs 00425 } 00426 startsTo_[i] = nodeIndex; 00427 int imageID = exportNodeIDs[nodeIndex]; 00428 imagesTo_[i] = imageID; 00429 index += starts[imageID]; 00430 nodeIndex += starts[imageID]; 00431 } 00432 #ifdef HAVE_TEUCHOS_DEBUG 00433 if (index != numActive) { 00434 index_neq_numActive = true; 00435 } 00436 #endif 00437 } 00438 // sort the startsTo and image IDs together, in ascending order, according 00439 // to image IDs 00440 if (numSends_ > 0) { 00441 sort2(imagesTo_.begin(), imagesTo_.end(), startsTo_.begin()); 00442 } 00443 // compute the maximum send length 00444 maxSendLength_ = 0; 00445 for (size_t i = 0; i < numSends_; ++i) { 00446 int imageID = imagesTo_[i]; 00447 lengthsTo_[i] = starts[imageID]; 00448 if ((imageID != myImageID) && (lengthsTo_[i] > maxSendLength_)) { 00449 maxSendLength_ = lengthsTo_[i]; 00450 } 00451 } 00452 } 00453 else { 00454 // not grouped by image, need send buffer and indicesTo_ 00455 00456 // starts[i] is the number of sends to node i 00457 // numActive equals number of sends total, \sum_i starts[i] 00458 00459 // this loop starts at starts[1], so explicitly check starts[0] 00460 if (starts[0] == 0 ) { 00461 numSends_ = 0; 00462 } 00463 else { 00464 numSends_ = 1; 00465 } 00466 for (Teuchos::Array<size_t>::iterator i=starts.begin()+1, 00467 im1=starts.begin(); 00468 i != starts.end(); ++i) 00469 { 00470 if (*i != 0) ++numSends_; 00471 *i += *im1; 00472 im1 = i; 00473 } 00474 // starts[i] now contains the number of exports to nodes 0 through i 00475 00476 for (Teuchos::Array<size_t>::reverse_iterator ip1=starts.rbegin(), 00477 i=starts.rbegin()+1; 00478 i != starts.rend(); ++i) 00479 { 00480 *ip1 = *i; 00481 ip1 = i; 00482 } 00483 starts[0] = 0; 00484 // starts[i] now contains the number of exports to nodes 0 through 00485 // i-1, i.e., all nodes before node i 00486 00487 indicesTo_.resize(numActive); 00488 00489 for (size_t i = 0; i < numExports_; ++i) { 00490 if (exportNodeIDs[i] >= 0) { 00491 // record the offset to the sendBuffer for this export 00492 indicesTo_[starts[exportNodeIDs[i]]] = i; 00493 // now increment the offset for this node 00494 ++starts[exportNodeIDs[i]]; 00495 } 00496 } 00497 // our send buffer will contain the export data for each of the nodes 00498 // we communicate with, in order by node id 00499 // sendBuffer = {node_0_data, node_1_data, ..., node_np-1_data} 00500 // indicesTo now maps each export to the location in our send buffer 00501 // associated with the export 00502 // data for export i located at sendBuffer[indicesTo[i]] 00503 // 00504 // starts[i] once again contains the number of exports to 00505 // nodes 0 through i 00506 for (int node = numImages-1; node != 0; --node) { 00507 starts[node] = starts[node-1]; 00508 } 00509 starts.front() = 0; 00510 starts[numImages] = numActive; 00511 // 00512 // starts[node] once again contains the number of exports to 00513 // nodes 0 through node-1 00514 // i.e., the start of my data in the sendBuffer 00515 00516 // this contains invalid data at nodes we don't care about, that is okay 00517 imagesTo_.resize(numSends_); 00518 startsTo_.resize(numSends_); 00519 lengthsTo_.resize(numSends_); 00520 00521 // for each group of sends/exports, record the destination node, 00522 // the length, and the offset for this send into the 00523 // send buffer (startsTo_) 00524 maxSendLength_ = 0; 00525 size_t snd = 0; 00526 for (int node = 0; node < numImages; ++node ) { 00527 if (starts[node+1] != starts[node]) { 00528 lengthsTo_[snd] = starts[node+1] - starts[node]; 00529 startsTo_[snd] = starts[node]; 00530 // record max length for all off-node sends 00531 if ((node != myImageID) && (lengthsTo_[snd] > maxSendLength_)) { 00532 maxSendLength_ = lengthsTo_[snd]; 00533 } 00534 imagesTo_[snd] = node; 00535 ++snd; 00536 } 00537 } 00538 #ifdef HAVE_TEUCHOS_DEBUG 00539 if (snd != numSends_) { 00540 send_neq_numSends = true; 00541 } 00542 #endif 00543 } 00544 #ifdef HAVE_TEUCHOS_DEBUG 00545 SHARED_TEST_FOR_EXCEPTION(index_neq_numActive, std::logic_error, 00546 "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_); 00547 SHARED_TEST_FOR_EXCEPTION(send_neq_numSends, std::logic_error, 00548 "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_); 00549 #endif 00550 00551 if (selfMessage_) --numSends_; 00552 00553 // Invert map to see what msgs are received and what length 00554 computeReceives(); 00555 00556 return totalReceiveLength_; 00557 } 00558 00559 } // namespace Tpetra
1.7.4