|
Teuchos Package Browser (Single Doxygen Collection) Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Teuchos: Common Tools Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef TEUCHOS_MPICONTAINERCOMM_H 00030 #define TEUCHOS_MPICONTAINERCOMM_H 00031 00036 #include "Teuchos_ConfigDefs.hpp" 00037 #include "Teuchos_Array.hpp" 00038 #include "Teuchos_MPIComm.hpp" 00039 #include "Teuchos_MPITraits.hpp" 00040 00041 namespace Teuchos 00042 { 00049 template <class T> class MPIContainerComm 00050 { 00051 public: 00052 00054 static void bcast(T& x, int src, const MPIComm& comm); 00055 00057 static void bcast(Array<T>& x, int src, const MPIComm& comm); 00058 00060 static void bcast(Array<Array<T> >& x, 00061 int src, const MPIComm& comm); 00062 00064 static void allGather(const T& outgoing, 00065 Array<T>& incoming, 00066 const MPIComm& comm); 00067 00069 static void allToAll(const Array<T>& outgoing, 00070 Array<Array<T> >& incoming, 00071 const MPIComm& comm); 00072 00074 static void allToAll(const Array<Array<T> >& outgoing, 00075 Array<Array<T> >& incoming, 00076 const MPIComm& comm); 00077 00079 static void gatherv(const Array<T>& outgoing, 00080 Array<Array<T> >& incoming, 00081 int rootRank, 00082 const MPIComm& comm); 00083 00085 static void accumulate(const T& localValue, Array<T>& sums, T& total, 00086 const MPIComm& comm); 00087 00088 private: 00090 static void getBigArray(const Array<Array<T> >& x, 00091 Array<T>& bigArray, 00092 Array<int>& offsets); 00093 00095 static void getSmallArrays(const Array<T>& bigArray, 00096 const Array<int>& offsets, 00097 Array<Array<T> >& x); 00098 00099 00100 }; 00101 00102 00103 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00104 00107 template <> class MPIContainerComm<std::string> 00108 { 00109 public: 00110 static void bcast(std::string& x, int src, const MPIComm& comm); 00111 00113 static void bcast(Array<std::string>& x, int src, const MPIComm& comm); 00114 00116 static void bcast(Array<Array<std::string> >& x, 00117 int src, const MPIComm& comm); 00118 00120 static void allGather(const std::string& outgoing, 00121 Array<std::string>& incoming, 00122 const MPIComm& comm); 00123 00125 static void gatherv(const Array<std::string>& outgoing, 00126 Array<Array<std::string> >& incoming, 00127 int rootRank, 00128 const MPIComm& comm); 00129 00137 static void pack(const Array<std::string>& x, 00138 Array<char>& packed); 00139 00142 static void unpack(const Array<char>& packed, 00143 Array<std::string>& x); 00144 private: 00146 static void getBigArray(const Array<std::string>& x, 00147 Array<char>& bigArray, 00148 Array<int>& offsets); 00149 00152 static void getStrings(const Array<char>& bigArray, 00153 const Array<int>& offsets, 00154 Array<std::string>& x); 00155 }; 00156 00157 #endif // DOXYGEN_SHOULD_SKIP_THIS 00158 00159 /* --------- generic functions for primitives ------------------- */ 00160 00161 template <class T> inline void MPIContainerComm<T>::bcast(T& x, int src, 00162 const MPIComm& comm) 00163 { 00164 comm.bcast((void*)&x, 1, MPITraits<T>::type(), src); 00165 } 00166 00167 00168 /* ----------- generic functions for arrays of primitives ----------- */ 00169 00170 template <class T> 00171 inline void MPIContainerComm<T>::bcast(Array<T>& x, int src, const MPIComm& comm) 00172 { 00173 int len = x.length(); 00174 MPIContainerComm<int>::bcast(len, src, comm); 00175 00176 if (comm.getRank() != src) 00177 { 00178 x.resize(len); 00179 } 00180 if (len==0) return; 00181 00182 /* then broadcast the contents */ 00183 comm.bcast((void*) &(x[0]), (int) len, 00184 MPITraits<T>::type(), src); 00185 } 00186 00187 00188 00189 /* ---------- generic function for arrays of arrays ----------- */ 00190 00191 template <class T> 00192 inline void MPIContainerComm<T>::bcast(Array<Array<T> >& x, int src, const MPIComm& comm) 00193 { 00194 Array<T> bigArray; 00195 Array<int> offsets; 00196 00197 if (src==comm.getRank()) 00198 { 00199 getBigArray(x, bigArray, offsets); 00200 } 00201 00202 bcast(bigArray, src, comm); 00203 MPIContainerComm<int>::bcast(offsets, src, comm); 00204 00205 if (src != comm.getRank()) 00206 { 00207 getSmallArrays(bigArray, offsets, x); 00208 } 00209 } 00210 00211 /* ---------- generic gather and scatter ------------------------ */ 00212 00213 template <class T> inline 00214 void MPIContainerComm<T>::allToAll(const Array<T>& outgoing, 00215 Array<Array<T> >& incoming, 00216 const MPIComm& comm) 00217 { 00218 int numProcs = comm.getNProc(); 00219 00220 // catch degenerate case 00221 if (numProcs==1) 00222 { 00223 incoming.resize(1); 00224 incoming[0] = outgoing; 00225 return; 00226 } 00227 00228 Array<T> sb(numProcs * outgoing.length()); 00229 Array<T> rb(numProcs * outgoing.length()); 00230 00231 T* sendBuf = new T[numProcs * outgoing.length()]; 00232 TEST_FOR_EXCEPTION(sendBuf==0, 00233 std::runtime_error, "Comm::allToAll failed to allocate sendBuf"); 00234 00235 T* recvBuf = new T[numProcs * outgoing.length()]; 00236 TEST_FOR_EXCEPTION(recvBuf==0, 00237 std::runtime_error, "Comm::allToAll failed to allocate recvBuf"); 00238 00239 int i; 00240 for (i=0; i<numProcs; i++) 00241 { 00242 for (int j=0; j<outgoing.length(); j++) 00243 { 00244 sendBuf[i*outgoing.length() + j] = outgoing[j]; 00245 } 00246 } 00247 00248 00249 00250 comm.allToAll(sendBuf, outgoing.length(), MPITraits<T>::type(), 00251 recvBuf, outgoing.length(), MPITraits<T>::type()); 00252 00253 incoming.resize(numProcs); 00254 00255 for (i=0; i<numProcs; i++) 00256 { 00257 incoming[i].resize(outgoing.length()); 00258 for (int j=0; j<outgoing.length(); j++) 00259 { 00260 incoming[i][j] = recvBuf[i*outgoing.length() + j]; 00261 } 00262 } 00263 00264 delete [] sendBuf; 00265 delete [] recvBuf; 00266 } 00267 00268 template <class T> inline 00269 void MPIContainerComm<T>::allToAll(const Array<Array<T> >& outgoing, 00270 Array<Array<T> >& incoming, const MPIComm& comm) 00271 { 00272 int numProcs = comm.getNProc(); 00273 00274 // catch degenerate case 00275 if (numProcs==1) 00276 { 00277 incoming = outgoing; 00278 return; 00279 } 00280 00281 int* sendMesgLength = new int[numProcs]; 00282 TEST_FOR_EXCEPTION(sendMesgLength==0, 00283 std::runtime_error, "failed to allocate sendMesgLength"); 00284 int* recvMesgLength = new int[numProcs]; 00285 TEST_FOR_EXCEPTION(recvMesgLength==0, 00286 std::runtime_error, "failed to allocate recvMesgLength"); 00287 00288 int p = 0; 00289 for (p=0; p<numProcs; p++) 00290 { 00291 sendMesgLength[p] = outgoing[p].length(); 00292 } 00293 00294 comm.allToAll(sendMesgLength, 1, MPIComm::INT, 00295 recvMesgLength, 1, MPIComm::INT); 00296 00297 00298 int totalSendLength = 0; 00299 int totalRecvLength = 0; 00300 for (p=0; p<numProcs; p++) 00301 { 00302 totalSendLength += sendMesgLength[p]; 00303 totalRecvLength += recvMesgLength[p]; 00304 } 00305 00306 T* sendBuf = new T[totalSendLength]; 00307 TEST_FOR_EXCEPTION(sendBuf==0, 00308 std::runtime_error, "failed to allocate sendBuf"); 00309 T* recvBuf = new T[totalRecvLength]; 00310 TEST_FOR_EXCEPTION(recvBuf==0, 00311 std::runtime_error, "failed to allocate recvBuf"); 00312 00313 int* sendDisp = new int[numProcs]; 00314 TEST_FOR_EXCEPTION(sendDisp==0, 00315 std::runtime_error, "failed to allocate sendDisp"); 00316 int* recvDisp = new int[numProcs]; 00317 TEST_FOR_EXCEPTION(recvDisp==0, 00318 std::runtime_error, "failed to allocate recvDisp"); 00319 00320 int count = 0; 00321 sendDisp[0] = 0; 00322 recvDisp[0] = 0; 00323 00324 for (p=0; p<numProcs; p++) 00325 { 00326 for (int i=0; i<outgoing[p].length(); i++) 00327 { 00328 sendBuf[count] = outgoing[p][i]; 00329 count++; 00330 } 00331 if (p>0) 00332 { 00333 sendDisp[p] = sendDisp[p-1] + sendMesgLength[p-1]; 00334 recvDisp[p] = recvDisp[p-1] + recvMesgLength[p-1]; 00335 } 00336 } 00337 00338 comm.allToAllv(sendBuf, sendMesgLength, 00339 sendDisp, MPITraits<T>::type(), 00340 recvBuf, recvMesgLength, 00341 recvDisp, MPITraits<T>::type()); 00342 00343 incoming.resize(numProcs); 00344 for (p=0; p<numProcs; p++) 00345 { 00346 incoming[p].resize(recvMesgLength[p]); 00347 for (int i=0; i<recvMesgLength[p]; i++) 00348 { 00349 incoming[p][i] = recvBuf[recvDisp[p] + i]; 00350 } 00351 } 00352 00353 delete [] sendBuf; 00354 delete [] sendMesgLength; 00355 delete [] sendDisp; 00356 delete [] recvBuf; 00357 delete [] recvMesgLength; 00358 delete [] recvDisp; 00359 } 00360 00361 template <class T> inline 00362 void MPIContainerComm<T>::allGather(const T& outgoing, Array<T>& incoming, 00363 const MPIComm& comm) 00364 { 00365 int nProc = comm.getNProc(); 00366 incoming.resize(nProc); 00367 00368 if (nProc==1) 00369 { 00370 incoming[0] = outgoing; 00371 } 00372 else 00373 { 00374 comm.allGather((void*) &outgoing, 1, MPITraits<T>::type(), 00375 (void*) &(incoming[0]), 1, MPITraits<T>::type()); 00376 } 00377 } 00378 00379 template <class T> inline 00380 void MPIContainerComm<T>::accumulate(const T& localValue, Array<T>& sums, 00381 T& total, 00382 const MPIComm& comm) 00383 { 00384 Array<T> contributions; 00385 allGather(localValue, contributions, comm); 00386 sums.resize(comm.getNProc()); 00387 sums[0] = 0; 00388 total = contributions[0]; 00389 00390 for (int i=0; i<comm.getNProc()-1; i++) 00391 { 00392 total += contributions[i+1]; 00393 sums[i+1] = sums[i] + contributions[i]; 00394 } 00395 } 00396 00397 00398 00399 00400 template <class T> inline 00401 void MPIContainerComm<T>::getBigArray(const Array<Array<T> >& x, Array<T>& bigArray, 00402 Array<int>& offsets) 00403 { 00404 offsets.resize(x.length()+1); 00405 int totalLength = 0; 00406 00407 for (int i=0; i<x.length(); i++) 00408 { 00409 offsets[i] = totalLength; 00410 totalLength += x[i].length(); 00411 } 00412 offsets[x.length()] = totalLength; 00413 00414 bigArray.resize(totalLength); 00415 00416 for (int i=0; i<x.length(); i++) 00417 { 00418 for (int j=0; j<x[i].length(); j++) 00419 { 00420 bigArray[offsets[i]+j] = x[i][j]; 00421 } 00422 } 00423 } 00424 00425 template <class T> inline 00426 void MPIContainerComm<T>::getSmallArrays(const Array<T>& bigArray, 00427 const Array<int>& offsets, 00428 Array<Array<T> >& x) 00429 { 00430 x.resize(offsets.length()-1); 00431 for (int i=0; i<x.length(); i++) 00432 { 00433 x[i].resize(offsets[i+1]-offsets[i]); 00434 for (int j=0; j<x[i].length(); j++) 00435 { 00436 x[i][j] = bigArray[offsets[i] + j]; 00437 } 00438 } 00439 } 00440 00441 00442 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00443 00444 /* --------------- std::string specializations --------------------- */ 00445 00446 inline void MPIContainerComm<std::string>::bcast(std::string& x, 00447 int src, const MPIComm& comm) 00448 { 00449 int len = x.length(); 00450 MPIContainerComm<int>::bcast(len, src, comm); 00451 00452 x.resize(len); 00453 comm.bcast((void*)&(x[0]), len, MPITraits<char>::type(), src); 00454 } 00455 00456 00457 inline void MPIContainerComm<std::string>::bcast(Array<std::string>& x, int src, 00458 const MPIComm& comm) 00459 { 00460 /* begin by packing all the data into a big char array. This will 00461 * take a little time, but will be cheaper than multiple MPI calls */ 00462 Array<char> bigArray; 00463 Array<int> offsets; 00464 if (comm.getRank()==src) 00465 { 00466 getBigArray(x, bigArray, offsets); 00467 } 00468 00469 /* now broadcast the big array and the offsets */ 00470 MPIContainerComm<char>::bcast(bigArray, src, comm); 00471 MPIContainerComm<int>::bcast(offsets, src, comm); 00472 00473 /* finally, reassemble the array of strings */ 00474 if (comm.getRank() != src) 00475 { 00476 getStrings(bigArray, offsets, x); 00477 } 00478 } 00479 00480 inline void MPIContainerComm<std::string>::bcast(Array<Array<std::string> >& x, 00481 int src, const MPIComm& comm) 00482 { 00483 int len = x.length(); 00484 MPIContainerComm<int>::bcast(len, src, comm); 00485 00486 x.resize(len); 00487 for (int i=0; i<len; i++) 00488 { 00489 MPIContainerComm<std::string>::bcast(x[i], src, comm); 00490 } 00491 } 00492 00493 00494 inline void MPIContainerComm<std::string>::allGather(const std::string& outgoing, 00495 Array<std::string>& incoming, 00496 const MPIComm& comm) 00497 { 00498 int nProc = comm.getNProc(); 00499 00500 int sendCount = outgoing.length(); 00501 00502 incoming.resize(nProc); 00503 00504 int* recvCounts = new int[nProc]; 00505 int* recvDisplacements = new int[nProc]; 00506 00507 /* share lengths with all procs */ 00508 comm.allGather((void*) &sendCount, 1, MPIComm::INT, 00509 (void*) recvCounts, 1, MPIComm::INT); 00510 00511 00512 int recvSize = 0; 00513 recvDisplacements[0] = 0; 00514 for (int i=0; i<nProc; i++) 00515 { 00516 recvSize += recvCounts[i]; 00517 if (i < nProc-1) 00518 { 00519 recvDisplacements[i+1] = recvDisplacements[i]+recvCounts[i]; 00520 } 00521 } 00522 00523 char* recvBuf = new char[recvSize]; 00524 00525 comm.allGatherv((void*) outgoing.c_str(), sendCount, MPIComm::CHAR, 00526 recvBuf, recvCounts, recvDisplacements, MPIComm::CHAR); 00527 00528 for (int j=0; j<nProc; j++) 00529 { 00530 char* start = recvBuf + recvDisplacements[j]; 00531 char* tmp = new char[recvCounts[j]+1]; 00532 std::memcpy(tmp, start, recvCounts[j]); 00533 tmp[recvCounts[j]] = '\0'; 00534 incoming[j] = std::string(tmp); 00535 delete [] tmp; 00536 } 00537 00538 delete [] recvCounts; 00539 delete [] recvDisplacements; 00540 delete [] recvBuf; 00541 } 00542 00543 inline void MPIContainerComm<std::string>::gatherv(const Array<std::string>& outgoing, 00544 Array<Array<std::string> >& incoming, 00545 int root, 00546 const MPIComm& comm) 00547 { 00548 int nProc = comm.getNProc(); 00549 00550 Array<char> packedLocalArray; 00551 pack(outgoing, packedLocalArray); 00552 00553 int sendCount = packedLocalArray.size(); 00554 00555 /* gather the message sizes from all procs */ 00556 Array<int> recvCounts(nProc); 00557 Array<int> recvDisplacements(nProc); 00558 00559 comm.gather((void*) &sendCount, 1, MPIComm::INT, 00560 (void*) &(recvCounts[0]), 1, MPIComm::INT, root); 00561 00562 /* compute the displacements */ 00563 int recvSize = 0; 00564 if (root == comm.getRank()) 00565 { 00566 recvDisplacements[0] = 0; 00567 for (int i=0; i<nProc; i++) 00568 { 00569 recvSize += recvCounts[i]; 00570 if (i < nProc-1) 00571 { 00572 recvDisplacements[i+1] = recvDisplacements[i]+recvCounts[i]; 00573 } 00574 } 00575 } 00576 00577 /* set the size to 1 on non-root procs */ 00578 Array<char> recvBuf(std::max(1,recvSize)); 00579 00580 00581 void* sendBuf = (void*) &(packedLocalArray[0]); 00582 void* inBuf = (void*) &(recvBuf[0]); 00583 int* inCounts = &(recvCounts[0]); 00584 int* inDisps = &(recvDisplacements[0]); 00585 00586 /* gather the packed data */ 00587 comm.gatherv( sendBuf, sendCount, MPIComm::CHAR, 00588 inBuf, inCounts, inDisps, 00589 MPIComm::CHAR, root); 00590 00591 /* on the root, unpack the data */ 00592 if (comm.getRank()==root) 00593 { 00594 incoming.resize(nProc); 00595 for (int j=0; j<nProc; j++) 00596 { 00597 char* start = &(recvBuf[0]) + recvDisplacements[j]; 00598 Array<char> tmp(recvCounts[j]+1); 00599 std::memcpy(&(tmp[0]), start, recvCounts[j]); 00600 tmp[recvCounts[j]] = '\0'; 00601 unpack(tmp, incoming[j]); 00602 } 00603 } 00604 00605 00606 } 00607 00608 00609 inline void MPIContainerComm<std::string>::getBigArray(const Array<std::string>& x, 00610 Array<char>& bigArray, 00611 Array<int>& offsets) 00612 { 00613 offsets.resize(x.length()+1); 00614 int totalLength = 0; 00615 00616 for (int i=0; i<x.length(); i++) 00617 { 00618 offsets[i] = totalLength; 00619 totalLength += x[i].length(); 00620 } 00621 offsets[x.length()] = totalLength; 00622 00623 bigArray.resize(totalLength); 00624 00625 for (int i=0; i<x.length(); i++) 00626 { 00627 for (unsigned int j=0; j<x[i].length(); j++) 00628 { 00629 bigArray[offsets[i]+j] = x[i][j]; 00630 } 00631 } 00632 } 00633 00634 inline void MPIContainerComm<std::string>::pack(const Array<std::string>& x, 00635 Array<char>& bigArray) 00636 { 00637 Array<int> offsets(x.size()+1); 00638 int headerSize = (x.size()+2) * sizeof(int); 00639 00640 int totalLength = headerSize; 00641 00642 for (int i=0; i<x.length(); i++) 00643 { 00644 offsets[i] = totalLength; 00645 totalLength += x[i].length(); 00646 } 00647 offsets[x.length()] = totalLength; 00648 00649 /* The array will be packed as follows: 00650 * [numStrs, offset1, ... offsetN, characters data] 00651 */ 00652 00653 bigArray.resize(totalLength); 00654 00655 int* header = reinterpret_cast<int*>( &(bigArray[0]) ); 00656 header[0] = x.size(); 00657 for (Array<std::string>::size_type i=0; i<=x.size(); i++) 00658 { 00659 header[i+1] = offsets[i]; 00660 } 00661 00662 for (int i=0; i<x.length(); i++) 00663 { 00664 for (unsigned int j=0; j<x[i].length(); j++) 00665 { 00666 bigArray[offsets[i]+j] = x[i][j]; 00667 } 00668 } 00669 } 00670 00671 inline void MPIContainerComm<std::string>::unpack(const Array<char>& packed, 00672 Array<std::string>& x) 00673 { 00674 const int* header = reinterpret_cast<const int*>( &(packed[0]) ); 00675 00676 x.resize(header[0]); 00677 Array<int> offsets(x.size()+1); 00678 for (Array<std::string>::size_type i=0; i<=x.size(); i++) offsets[i] = header[i+1]; 00679 00680 for (Array<std::string>::size_type i=0; i<x.size(); i++) 00681 { 00682 x[i].resize(offsets[i+1]-offsets[i]); 00683 for (std::string::size_type j=0; j<x[i].length(); j++) 00684 { 00685 x[i][j] = packed[offsets[i] + j]; 00686 } 00687 } 00688 } 00689 00690 inline void MPIContainerComm<std::string>::getStrings(const Array<char>& bigArray, 00691 const Array<int>& offsets, 00692 Array<std::string>& x) 00693 { 00694 x.resize(offsets.length()-1); 00695 for (int i=0; i<x.length(); i++) 00696 { 00697 x[i].resize(offsets[i+1]-offsets[i]); 00698 for (unsigned int j=0; j<x[i].length(); j++) 00699 { 00700 x[i][j] = bigArray[offsets[i] + j]; 00701 } 00702 } 00703 } 00704 #endif // DOXYGEN_SHOULD_SKIP_THIS 00705 00706 } 00707 00708 00709 #endif 00710 00711
1.7.4