|
IterationPack: General framework for building iterative algorithms Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00005 // Copyright (2003) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #include <signal.h> 00030 00031 #include <iterator> 00032 #include <numeric> 00033 00034 #include "IterationPack_Algorithm.hpp" 00035 #include "StopWatchPack_stopwatch.hpp" 00036 #include "Teuchos_TestForException.hpp" 00037 #include "Teuchos_TypeNameTraits.hpp" 00038 #include "Teuchos_GlobalMPISession.hpp" 00039 00040 #ifdef HAVE_MPI 00041 #include "mpi.h" 00042 #endif 00043 00044 // Define to see MPI/interrupt deugging output 00045 //#define ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 00046 00047 // Define of the MPI implementation receives signals on all processes 00048 //#define ITERATION_PACK_ALGORITHM_SIGNALS_ON_ALL_PROCESSES; 00049 00050 extern "C" { 00051 00052 void sig_handler_interrupt_algorithm( int signum ) 00053 { 00054 IterationPack::Algorithm::interrupt(); 00055 } 00056 00057 } // extern "C" 00058 00059 namespace { 00060 00061 // Helper functions 00062 00063 template< class T > 00064 inline 00065 T my_max( const T& v1, const T& v2 ) { return v1 > v2 ? v1 : v2; } 00066 00067 // Private static data for IterationPack::Algorithm. 00068 // I put it here so that I can modify it without affecting the 00069 // header file and avoiding unnecessary recompilations. 00070 00071 enum EInterruptStatus { NOT_INTERRUPTED=0, STOP_END_STEP=1, STOP_END_ITER=2, ABORT_PROGRAM=3 }; 00072 00073 int static_mpi_initialized = false; 00074 int static_num_running_algorithms = 0; 00075 int static_num_proc = 0; // Flag that no algorithm has been even allocated yet! 00076 int static_proc_rank = 0; 00077 bool static_interrupt_called = false; 00078 bool static_processed_user_interrupt = false; 00079 EInterruptStatus static_interrupt_status = NOT_INTERRUPTED; 00080 bool static_interrupt_terminate_return = false; 00081 00082 } // end namespace 00083 00084 // ToDo: change step_itr and assoc_step_itr to just return iterators without 00085 // asserting if the names exist. This will be more useful. 00086 00087 namespace IterationPack { 00088 00089 // constructors / destructor 00090 00091 Algorithm::Algorithm() 00092 :running_state_(NOT_RUNNING), max_iter_(100) 00093 ,max_run_time_(std::numeric_limits<double>::max()) 00094 ,next_step_name_(0), do_step_next_called_(false), reconfigured_(false) 00095 ,time_stats_computed_(false) 00096 { 00097 // Set MPI info 00098 static_num_proc = 1; 00099 static_proc_rank = 0; 00100 #ifdef HAVE_MPI 00101 // If MPI is not initialized then this must be because the code was 00102 // compiled with support for MPI but it not actually using it. 00103 // Therefore, we will initialize MPI but not bother to finialize it. 00104 if(!static_mpi_initialized) { 00105 int mpi_initialized = false; 00106 MPI_Initialized(&mpi_initialized); 00107 if(!mpi_initialized) { 00108 int argc = 1; 00109 char arg_str[] = "dummy_prg"; 00110 char *arg_str_ptr = arg_str; 00111 char **argv = &arg_str_ptr; 00112 MPI_Init( &argc, &argv ); 00113 } 00114 static_mpi_initialized = true; 00115 } 00116 // ToDo: Allow the specification of another communicator if needed! 00117 MPI_Comm_size( MPI_COMM_WORLD, &static_num_proc ); 00118 MPI_Comm_rank( MPI_COMM_WORLD, &static_proc_rank ); 00119 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 00120 std::cerr << "\np=" << static_proc_rank << ": Algorithm::Algorithm() being called (num_proc = "<<static_num_proc<<") ... \n"; 00121 #endif 00122 #endif // HAVE_MPI 00123 } 00124 00125 Algorithm::~Algorithm() 00126 {} 00127 00128 // maximum iterations 00129 00130 void Algorithm::max_iter(size_t max_iter) 00131 { max_iter_ = max_iter; } 00132 00133 size_t Algorithm::max_iter() const 00134 { return max_iter_; } 00135 00136 // maximum run tine 00137 00138 void Algorithm::max_run_time(double max_run_time) 00139 { max_run_time_ = max_run_time; } 00140 00141 double Algorithm::max_run_time() const 00142 { return max_run_time_; } 00143 00144 00145 // step information / access 00146 00147 int Algorithm::num_steps() const 00148 { return steps_.size(); } 00149 00150 Algorithm::poss_type Algorithm::get_step_poss(const std::string& step_name) const 00151 { 00152 steps_t::const_iterator itr = step_itr(step_name); 00153 return itr == steps_.end() ? DOES_NOT_EXIST : std::distance( steps_.begin(), itr ) + 1; 00154 } 00155 00156 const std::string& Algorithm::get_step_name(poss_type step_poss) const 00157 { return steps_[validate(step_poss) - 1].name; } 00158 00159 Algorithm::step_ptr_t& Algorithm::get_step(poss_type step_poss) 00160 { return steps_[validate(step_poss) - 1].step_ptr; } 00161 00162 const Algorithm::step_ptr_t& Algorithm::get_step(poss_type step_poss) const 00163 { return steps_[validate(step_poss) - 1].step_ptr; } 00164 00165 // pre/post step information / access 00166 00167 int Algorithm::num_assoc_steps(poss_type step_poss, EAssocStepType type) const 00168 { return assoc_steps_[validate(step_poss) - 1][type].size(); } 00169 00170 Algorithm::poss_type Algorithm::get_assoc_step_poss(poss_type step_poss, EAssocStepType type 00171 ,const std::string& assoc_step_name) const 00172 { 00173 // ToDo: change to return DOES_NOT_EXIST if it does not exist. 00174 const assoc_steps_ele_list_t &assoc_list = assoc_steps_[validate(step_poss) - 1][type]; 00175 assoc_steps_ele_list_t::const_iterator itr = assoc_step_itr(assoc_list,assoc_step_name); 00176 return itr == assoc_list.end() ? DOES_NOT_EXIST : std::distance( assoc_list.begin() , itr ) + 1; 00177 } 00178 00179 const std::string& Algorithm::get_assoc_step_name(poss_type step_poss, EAssocStepType type 00180 , poss_type assoc_step_poss) const 00181 { 00182 const assoc_steps_ele_list_t &assoc_list= assoc_steps_[validate(step_poss) - 1][type]; 00183 validate(assoc_list,assoc_step_poss); 00184 assoc_steps_ele_list_t::const_iterator itr = assoc_list.begin(); 00185 std::advance( itr, assoc_step_poss - 1 ); 00186 return (*itr).name; 00187 } 00188 00189 Algorithm::step_ptr_t& Algorithm::get_assoc_step(poss_type step_poss, EAssocStepType type 00190 , poss_type assoc_step_poss) 00191 { 00192 assoc_steps_ele_list_t &assoc_list= assoc_steps_[validate(step_poss) - 1][type]; 00193 validate(assoc_list,assoc_step_poss); 00194 assoc_steps_ele_list_t::iterator itr = assoc_list.begin(); 00195 std::advance( itr, assoc_step_poss - 1 ); 00196 return (*itr).step_ptr; 00197 } 00198 00199 const Algorithm::step_ptr_t& Algorithm::get_assoc_step(poss_type step_poss, EAssocStepType type 00200 , poss_type assoc_step_poss) const 00201 { 00202 const assoc_steps_ele_list_t &assoc_list= assoc_steps_[validate(step_poss) - 1][type]; 00203 validate(assoc_list,assoc_step_poss); 00204 assoc_steps_ele_list_t::const_iterator itr = assoc_list.begin(); 00205 std::advance( itr, assoc_step_poss - 1 ); 00206 return (*itr).step_ptr; 00207 } 00208 00209 // step manipulation 00210 00211 void Algorithm::insert_step(poss_type step_poss, const std::string& step_name, const step_ptr_t& step) 00212 { 00213 validate_not_in_state(RUNNING); 00214 TEST_FOR_EXCEPTION( 00215 step.get() == NULL, std::invalid_argument 00216 ,"Algorithm::insert_step(...) : A step with the name = \'" << step_name 00217 << "\' being inserted into the position = " << step_poss 00218 << " has step.get() == NULL!" ); 00219 // Make sure a step with this name does not already exist. 00220 steps_t::iterator itr; 00221 if( steps_.end() != ( itr = step_itr(step_name) ) ) 00222 TEST_FOR_EXCEPTION( 00223 true, AlreadyExists 00224 ,"Algorithm::insert_step(...) : A step with the name = " << step_name 00225 << " already exists at step_poss = " << std::distance(steps_.begin(),itr) + 1 ); 00226 // insert the step in such a way that any container can be used for steps_ 00227 itr = steps_.begin(); 00228 std::advance ( itr , validate(step_poss,+1) - 1 ); 00229 steps_.insert( itr , steps_ele_t(step,step_name) ); 00230 // insert the assoc_step element in such a way that any container can be used for assoc_steps_ 00231 assoc_steps_t::iterator a_itr = assoc_steps_.begin(); 00232 std::advance ( a_itr , step_poss - 1 ); 00233 assoc_steps_.insert( a_itr , assoc_steps_ele_t() ); 00234 } 00235 00236 void Algorithm::change_step_name(poss_type step_poss, const std::string& new_name) 00237 { 00238 validate_not_in_state(RUNNING); 00239 if(running_state() == RUNNING_BEING_CONFIGURED) { 00240 validate_not_curr_step(validate(step_poss)); 00241 validate_not_next_step(steps_[step_poss - 1].name); 00242 } 00243 steps_[step_poss - 1].name = new_name; 00244 } 00245 00246 void Algorithm::replace_step(poss_type step_poss, const step_ptr_t& step) 00247 { 00248 validate_not_in_state(RUNNING); 00249 if(running_state() == RUNNING_BEING_CONFIGURED) validate_not_curr_step(validate(step_poss)); 00250 steps_[step_poss - 1].step_ptr = step; 00251 } 00252 00253 void Algorithm::remove_step(poss_type step_poss) 00254 { 00255 validate_not_in_state(RUNNING); 00256 if(running_state() == RUNNING_BEING_CONFIGURED) { 00257 validate_not_curr_step(validate(step_poss)); 00258 validate_not_next_step(steps_[step_poss - 1].name); 00259 } 00260 // remove the step in such a way that any container can be used for steps_ 00261 steps_t::iterator itr = steps_.begin(); 00262 std::advance ( itr , validate(step_poss) - 1 ); 00263 steps_.erase( itr ); 00264 // remove the assoc_step element in such a way that any container can be used for assoc_steps_ 00265 assoc_steps_t::iterator a_itr = assoc_steps_.begin(); 00266 std::advance ( a_itr , step_poss - 1 ); 00267 assoc_steps_.erase( a_itr ); 00268 } 00269 00270 // pre/post step manipulation 00271 00272 void Algorithm::insert_assoc_step(poss_type step_poss, EAssocStepType type, poss_type assoc_step_poss 00273 , const std::string& assoc_step_name, const step_ptr_t& assoc_step) 00274 { 00275 validate_not_in_state(RUNNING); 00276 TEST_FOR_EXCEPTION( 00277 assoc_step.get() == NULL, std::invalid_argument 00278 ,"Algorithm::insert_assoc_step(...) : A step with the name = \'" << assoc_step_name 00279 << "\' being inserted into the position = " << step_poss 00280 << "." << ( type == PRE_STEP 00281 ? (int)assoc_step_poss - num_assoc_steps(step_poss,type) - 1 00282 : assoc_step_poss ) 00283 << " has assoc_step.get() == NULL!" ); 00284 if(running_state() == RUNNING_BEING_CONFIGURED) validate_not_curr_step(validate(step_poss)); 00285 // Make sure an associated step with this name does not already exist. 00286 assoc_steps_ele_list_t &assoc_list = assoc_steps_[step_poss - 1][type]; 00287 validate(assoc_list,assoc_step_poss,+1); 00288 assoc_steps_ele_list_t::iterator itr = assoc_list.begin(); 00289 char assoc_type_name[2][10] = { "PRE_STEP" , "POST_STEP" }; 00290 if( assoc_list.end() != ( itr = assoc_step_itr(assoc_list,assoc_step_name) ) ) 00291 TEST_FOR_EXCEPTION( 00292 true, AlreadyExists 00293 ,"Algorithm::insert_assoc_step(...) : An associated step of type = " 00294 << assoc_type_name[type] 00295 << " with the name = " << assoc_step_name 00296 << " already exists at step_poss = " << step_poss 00297 << " and assoc_step_poss = " << std::distance(assoc_list.begin(),itr) + 1 ); 00298 // insert an associated step in such a way that any container could be used. 00299 itr = assoc_list.begin(); 00300 std::advance( itr, assoc_step_poss - 1 ); 00301 assoc_list.insert( itr , assoc_steps_ele_list_ele_t(assoc_step,assoc_step_name) ); 00302 } 00303 00304 void Algorithm::remove_assoc_step(poss_type step_poss, EAssocStepType type, poss_type assoc_step_poss) 00305 { 00306 validate_not_in_state(RUNNING); 00307 if(running_state() == RUNNING_BEING_CONFIGURED) validate_not_curr_step(validate(step_poss)); 00308 validate(step_poss); 00309 assoc_steps_ele_list_t &assos_list = assoc_steps_[step_poss - 1][type]; 00310 validate(assos_list,assoc_step_poss); 00311 assoc_steps_ele_list_t::iterator itr = assos_list.begin(); 00312 std::advance( itr, assoc_step_poss - 1 ); 00313 assos_list.erase( itr ); 00314 } 00315 00316 // runtime configuration updating control 00317 00318 void Algorithm::begin_config_update() 00319 { 00320 validate_in_state(RUNNING); 00321 saved_next_step_name_ = *next_step_name_; 00322 saved_curr_step_name_ = steps_[curr_step_poss_ - 1].name; 00323 change_running_state(RUNNING_BEING_CONFIGURED); 00324 } 00325 00326 void Algorithm::end_config_update() 00327 { 00328 validate_in_state(RUNNING_BEING_CONFIGURED); 00329 00330 // update next_step_poss_ and next_step_name_. 00331 steps_t::iterator itr = step_itr(saved_next_step_name_); 00332 TEST_FOR_EXCEPT( !( itr != steps_.end() ) ); // the step with this name should not have been deleted or changed. 00333 next_step_poss_ = std::distance( steps_.begin() , itr ) + 1; 00334 next_step_name_ = &(*itr).name; 00335 00336 // update curr_step_poss_ 00337 itr = step_itr(saved_curr_step_name_); 00338 TEST_FOR_EXCEPT( !( itr != steps_.end() ) ); // the step with this name should not have been deleted or changed. 00339 curr_step_poss_ = std::distance( steps_.begin() , itr ) + 1; 00340 00341 // inform the step objects that *this has changes. 00342 imp_inform_steps( &AlgorithmStep::inform_updated ); 00343 00344 change_running_state(RUNNING); 00345 reconfigured_ = true; 00346 } 00347 00348 // algorithmic control 00349 00350 void Algorithm::do_step_next(const std::string& step_name) 00351 { 00352 validate_in_state(RUNNING); 00353 steps_t::iterator itr = step_itr_and_assert(step_name); 00354 next_step_poss_ = std::distance( steps_.begin() , itr ) + 1; 00355 next_step_name_ = &(*itr).name; 00356 do_step_next_called_ = true; 00357 } 00358 00359 void Algorithm::do_step_next(poss_type step_poss) 00360 { 00361 validate_in_state(RUNNING); 00362 const steps_ele_t &ele = steps_[validate(step_poss) - 1]; 00363 next_step_poss_ = step_poss; 00364 next_step_name_ = &ele.name; 00365 do_step_next_called_ = true; 00366 } 00367 00368 const std::string& Algorithm::what_is_next_step_name() const 00369 { 00370 validate_in_state(RUNNING); 00371 return *next_step_name_; 00372 } 00373 00374 Algorithm::poss_type Algorithm::what_is_next_step_poss() const 00375 { 00376 validate_in_state(RUNNING); 00377 return next_step_poss_; 00378 } 00379 00380 bool Algorithm::do_step(const std::string& step_name) 00381 { 00382 validate_in_state(RUNNING); 00383 return imp_do_step( std::distance( steps_.begin() , step_itr_and_assert(step_name) ) + 1 ); 00384 } 00385 00386 bool Algorithm::do_step(poss_type step_poss) 00387 { 00388 validate_in_state(RUNNING); 00389 return imp_do_step(step_poss); 00390 } 00391 00392 void Algorithm::terminate(bool success) 00393 { 00394 validate_in_state(RUNNING); 00395 terminate_status_ = success ? STATUS_TERMINATE_TRUE : STATUS_TERMINATE_FALSE; 00396 } 00397 00398 // start iterations 00399 00400 EAlgoReturn Algorithm::do_algorithm(poss_type step_poss) 00401 { 00402 using StopWatchPack::stopwatch; 00403 00404 validate_in_state(NOT_RUNNING); 00405 00406 track().initialize(); 00407 00408 try{ 00409 00410 terminate_status_ = STATUS_KEEP_RUNNING; 00411 change_running_state(RUNNING); 00412 00413 first_k_ = state().k(); 00414 next_step_poss_ = validate(step_poss); 00415 next_step_name_ = &steps_[step_poss - 1].name; 00416 00417 // Prepair for timing algorithm 00418 step_times_.resize( algo_timing_ ? (num_steps()+1) * (max_iter()+1+NUM_STEP_TIME_STATS) : 0 ); 00419 if( algo_timing_ ) { 00420 // step_times_[ max_iter() ] = 0.0; // flag for statistics not calc. yet. 00421 // // set iteration totals to zero 00422 // if( step_times_[(max_iter() + 1 + 5) * num_steps()] != 0.0 ) 00423 // std::fill_n( step_times_.begin() + (max_iter() + 1 + 5) * num_steps(), max_iter(), 0.0 ); 00424 std::fill_n( step_times_.begin(), step_times_.size(), 0.0 ); // Try setting everything to zero? 00425 time_stats_computed_ = false; 00426 } 00427 stopwatch step_timer; 00428 stopwatch overall_timer; 00429 00430 imp_inform_steps( &AlgorithmStep::initialize_step ); 00431 00432 overall_timer.start(); 00433 for(;;) { 00434 00435 curr_step_poss_ = next_step_poss_; 00436 // Note that curr_step_poss_ may change if there is a runtime 00437 // change in the configuration of the steps. 00438 00439 bool keep_on = true; 00440 00441 // Execute the steps for this step 00442 00443 if( algo_timing_ ) { 00444 step_timer.reset(); 00445 step_timer.start(); 00446 } 00447 00448 keep_on = imp_do_step(curr_step_poss_); 00449 00450 if( algo_timing_ ) { 00451 const double time = my_max(step_timer.stop(),-1e-50); // negative somehow (g++ -O2 ?) 00452 // time for step k for the iteration 00453 step_times_[state().k()-first_k_+(curr_step_poss_-1)*(max_iter()+1+NUM_STEP_TIME_STATS)] = time; 00454 // Add to time for the full iteration 00455 step_times_[state().k()-first_k_+(num_steps())*(max_iter()+1+NUM_STEP_TIME_STATS)] += time; 00456 } 00457 00458 // See if a step object called terminate(...) 00459 if(terminate_status_ != STATUS_KEEP_RUNNING) { 00460 EAlgoReturn algo_return; 00461 if( static_interrupt_status == STOP_END_STEP ) { 00462 algo_return = ( terminate_status_ == STATUS_TERMINATE_TRUE 00463 ? INTERRUPTED_TERMINATE_TRUE 00464 : INTERRUPTED_TERMINATE_FALSE ); 00465 static_interrupt_status = NOT_INTERRUPTED; 00466 } 00467 else { 00468 algo_return = ( terminate_status_ == STATUS_TERMINATE_TRUE 00469 ? TERMINATE_TRUE 00470 : TERMINATE_FALSE ); 00471 } 00472 return finalize_algorithm(algo_return); 00473 } 00474 00475 if(keep_on) { 00476 00477 // All the step objects returned true so increment the step and loop around 00478 00479 if( curr_step_poss_ == static_cast<poss_type>(num_steps()) ) { 00480 00481 // 00482 // This is the last step in the algorithm 00483 // 00484 00485 // Output this iteration 00486 track().output_iteration(*this); 00487 00488 // Check if the maximum number of iterations has been exceeded. 00489 if( state().k() - first_k_ >= max_iter() ) { 00490 return finalize_algorithm(MAX_ITER_EXCEEDED); 00491 } 00492 00493 // Check if the maximum runtime has been exceeded. 00494 if( ( overall_timer.read() / 60 ) >= max_run_time() ) { 00495 return finalize_algorithm(MAX_RUN_TIME_EXCEEDED); 00496 } 00497 00498 // Set if the algorithm was interrupted 00499 if( static_interrupt_status == STOP_END_ITER ) { 00500 static_interrupt_status = NOT_INTERRUPTED; 00501 const EAlgoReturn algo_return = ( static_interrupt_terminate_return 00502 ? INTERRUPTED_TERMINATE_TRUE 00503 : INTERRUPTED_TERMINATE_FALSE ); 00504 return finalize_algorithm(algo_return); 00505 } 00506 00507 // Transition the iteration quantities to k = k + 1 00508 state().next_iteration(); 00509 00510 // Setup to start the major loop over again 00511 next_step_poss_ = 1; 00512 next_step_name_ = &steps_[0].name; 00513 00514 } 00515 else { 00516 00517 // else just increment the step 00518 ++next_step_poss_; 00519 next_step_name_ = &steps_[next_step_poss_ - 1].name; 00520 00521 } 00522 00523 continue; // loop around 00524 00525 } 00526 else { 00527 // some step object returned false from its do_step(..) operation so it 00528 // should have called do_step_next(...) to request a jump to 00529 // a specific operation. 00530 if(!do_step_next_called_) 00531 TEST_FOR_EXCEPTION( 00532 true, InvalidControlProtocal 00533 ,"EAlgoReturn Algorithm::do_algorithm(...) :" 00534 " A step object returned false from its do_step(...) operation" 00535 " without calling do_step_next(...) to request jump to a specific" 00536 " step." ); 00537 do_step_next_called_ = false; 00538 // just loop around and do the step that the step object requested 00539 // by changing next_step_poss_ by its call to do_step_next(...). 00540 } 00541 } // end for(;;) 00542 00543 } // end try 00544 catch(...) { 00545 try { 00546 finalize_algorithm(TERMINATE_FALSE); 00547 } 00548 catch(...) { 00549 // We tried to finalize gracefully but we failed! 00550 } 00551 throw; 00552 } 00553 } 00554 00555 // algorithm information output 00556 00557 void Algorithm::print_steps(std::ostream& out) const 00558 { 00559 out << "\n*** Algorithm Steps ***\n\n"; 00560 imp_print_algorithm(out,false); 00561 out << std::endl; 00562 } 00563 00564 void Algorithm::print_algorithm(std::ostream& out) const 00565 { 00566 out << "\n*** Iteration Quantities ***\n\n"; 00567 state().dump_iter_quant(out); 00568 out << std::endl; 00569 out << "\n*** Algorithm Description ***\n\n"; 00570 imp_print_algorithm(out,true); 00571 out << std::endl; 00572 } 00573 00574 // Algorithm Timing. 00575 00576 void Algorithm::set_algo_timing( bool algo_timing ) { 00577 validate_not_in_state(RUNNING); 00578 algo_timing_ = algo_timing; 00579 } 00580 00581 bool Algorithm::algo_timing() const { 00582 return algo_timing_; 00583 } 00584 00585 void Algorithm::print_algorithm_times( std::ostream& out ) const 00586 { 00587 using std::setw; 00588 using std::endl; 00589 00590 validate_not_in_state(RUNNING); 00591 00592 if( step_times_.size() == 0 ) { 00593 out << "No step timing was performed\n"; 00594 return; 00595 } 00596 00597 const int w = 10; 00598 const int prec = 4; 00599 const int n = num_steps(); // Total steps 00600 const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00601 const int mm = max_iter()+1; // Total number of possible iterations 00602 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00603 00604 // Print the header. 00605 out << "\n\n**************************************\n" 00606 << "*** Algorithm step CPU times (sec) ***\n"; 00607 00608 // Print the step names. 00609 out << "\nStep names" 00610 << "\n----------\n"; 00611 {for( int i = 1; i <= n; ++i ) { 00612 out << i << ") \"" << get_step_name(i) << "\"\n"; 00613 }} 00614 out << n+1 << ") Iteration total\n"; 00615 out << endl; 00616 00617 out << std::right << std::setprecision(prec); 00618 00619 // Print table header 00620 out << setw(w) << "" << " steps 1..." << n+1 << " ->\n\n"; 00621 00622 // print step numbers 00623 out << setw(w) << " iter k"; 00624 {for( int i = 1; i <= n+1; ++i ) { 00625 out << setw(w) << i; 00626 }} 00627 out << endl; 00628 out << setw(w) << "--------"; 00629 {for( int i = 1; i <= n+1; ++i ) { 00630 out << setw(w) << "--------"; 00631 }} 00632 out << endl; 00633 // Print the step times. 00634 {for( int k = 0; k < m; ++k ) { 00635 out << setw(w) << first_k_ + k; 00636 {for( int i = 0; i < n+1; ++i ) { 00637 out << setw(w) << step_times_[k+i*mmm]; 00638 }} 00639 out << endl; 00640 }} 00641 00642 // Compute the (1) totals for each step, the (2) average, (3) min and (4) max times 00643 // per iteration for each step and the (5) precentages for each step. 00644 00645 compute_final_time_stats(); 00646 00647 // Ouput time statistics. 00648 00649 out << setw(w) << "--------"; 00650 {for( int i = 1; i <= n+1; ++i ) { 00651 out << setw(w) << "--------"; 00652 }} 00653 00654 // Output the total times for each step. 00655 out << endl; 00656 out << setw(w) << "total(sec)"; 00657 {for( int i = 0; i < n+1; ++i ) { 00658 const double *step_i_times = &step_times_[i*mmm]; 00659 out << setw(w) << step_i_times[ mm + TIME_STAT_TOTALS_OFFSET ]; 00660 }} 00661 out << endl; 00662 00663 // Output the average times per iteration 00664 out << setw(w) << "av(sec)/k"; 00665 {for( int i = 0; i < n+1; ++i ) { 00666 const double *step_i_times = &step_times_[i*mmm]; 00667 out << setw(w) << step_i_times[ mm + TIME_STAT_AV_OFFSET ]; 00668 }} 00669 out << endl; 00670 00671 // Output the min times per iteration 00672 out << setw(w) << "min(sec)"; 00673 {for( int i = 0; i < n+1; ++i ) { 00674 const double *step_i_times = &step_times_[i*mmm]; 00675 out << setw(w) << step_i_times[ mm + TIME_STAT_MIN_OFFSET ]; 00676 }} 00677 out << endl; 00678 00679 // Output the max times per iteration 00680 out << setw(w) << "max(sec)"; 00681 {for( int i = 0; i < n+1; ++i ) { 00682 const double *step_i_times = &step_times_[i*mmm]; 00683 out << setw(w) << step_i_times[ mm + TIME_STAT_MAX_OFFSET ]; 00684 }} 00685 out << endl; 00686 00687 // Output the precentage times for each step. 00688 out << setw(w) << "% total"; 00689 {for( int i = 0; i < n+1; ++i ) { 00690 const double *step_i_times = &step_times_[i*mmm]; 00691 out << setw(w) << step_i_times[ mm + TIME_STAT_PERCENT_OFFSET ] * 100.0; 00692 }} 00693 out << endl; 00694 00695 00696 // Print total time for entire algorithm. 00697 out << "------------------------------" << endl 00698 << "total CPU time = " << total_time_ << " sec\n";; 00699 } 00700 00701 00702 void Algorithm::get_step_times_k( int offset, double step_times[] ) const 00703 { 00704 TEST_FOR_EXCEPTION( 00705 step_times_.size() == 0, std::logic_error 00706 ,"Algorithm::get_step_times_k(...) : times requested, but no times calculated!" 00707 ); 00708 TEST_FOR_EXCEPTION( 00709 offset > 0, std::invalid_argument 00710 ,"Algorithm::get_step_times_k(...) : Can\'t get times for an iteratin that has not occured yet!." 00711 ); 00712 00713 const int n = num_steps(); // Total steps 00714 //const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00715 const int mm = max_iter()+1; // Total number of possible iterations 00716 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00717 00718 const int k = state().k() + offset; 00719 {for (int step = 0; step < n+1; ++step) { 00720 step_times[step] = step_times_[step*mmm + k]; 00721 }} 00722 00723 } 00724 00725 void Algorithm::get_final_step_stats( size_t step, double* total, double* average, double* min, double* max, double* percent) const 00726 { 00727 // Compute the (1) totals for each step, the (2) average, (3) min and (4) max times 00728 // per iteration for each step and the (5) precentages for each step. 00729 compute_final_time_stats(); 00730 00731 //const int n = num_steps(); // Total steps 00732 //const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00733 const int mm = max_iter()+1; // Total number of possible iterations 00734 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00735 00736 double* step_i_times = &const_cast<step_times_t&>(step_times_)[step*mmm]; 00737 if (total) { 00738 *total = step_i_times[mm + TIME_STAT_TOTALS_OFFSET]; 00739 } 00740 if (average) { 00741 *average = step_i_times[mm + TIME_STAT_AV_OFFSET]; 00742 } 00743 if (min) { 00744 *min = step_i_times[mm + TIME_STAT_MIN_OFFSET]; 00745 } 00746 if (max) { 00747 *max = step_i_times[mm + TIME_STAT_MAX_OFFSET]; 00748 } 00749 if (percent) { 00750 *percent = step_i_times[mm + TIME_STAT_PERCENT_OFFSET]; 00751 } 00752 } 00753 00754 EAlgoReturn Algorithm::finalize_algorithm( EAlgoReturn algo_return ) 00755 { 00756 change_running_state(NOT_RUNNING); 00757 imp_inform_steps( &AlgorithmStep::finalize_step ); 00758 track().output_final(*this,algo_return); 00759 return algo_return; 00760 } 00761 00762 void Algorithm::compute_final_time_stats() const 00763 { 00764 if (!time_stats_computed_) { 00765 time_stats_computed_ = true; 00766 00767 const int n = num_steps(); // Total steps 00768 const int m = state().k() - first_k_ + 1; // Total number of iterations performed 00769 const int mm = max_iter()+1; // Total number of possible iterations 00770 const int mmm = mm + NUM_STEP_TIME_STATS; // total entries in a step_i row 00771 00772 // compute totals for each step (1...n) and the full iteration (n+1) 00773 double &_total_time = const_cast<double&>(total_time_); 00774 _total_time = 0.0; 00775 00776 {for( int i = 0; i < n+1; ++i ) { 00777 double *step_i_times = &const_cast<step_times_t&>(step_times_)[i*mmm]; 00778 // compute total step times (and total algorithm time) 00779 const double 00780 step_time = std::accumulate( step_i_times, step_i_times + m, (double)0.0 ); 00781 if(i < n) 00782 _total_time += step_time; 00783 step_i_times[ mm + TIME_STAT_TOTALS_OFFSET ] = step_time; 00784 // compute average per step. 00785 step_i_times[ mm + TIME_STAT_AV_OFFSET ] = step_time / m; 00786 // compute min per step 00787 step_i_times[ mm + TIME_STAT_MIN_OFFSET ]= *std::min_element( step_i_times, step_i_times + m ); 00788 // compute max per step 00789 step_i_times[ mm + TIME_STAT_MAX_OFFSET ]= *std::max_element( step_i_times, step_i_times + m ); 00790 }} 00791 00792 {for( int i = 0; i < n+1; ++i ) { 00793 double *step_i_times = &const_cast<step_times_t&>(step_times_)[i*mmm]; 00794 // compute fractions for each step. 00795 step_i_times[ mm + TIME_STAT_PERCENT_OFFSET ] 00796 = step_i_times[ mm + TIME_STAT_TOTALS_OFFSET ] / total_time_; 00797 }} 00798 } 00799 } 00800 00801 // private 00802 00803 void Algorithm::change_running_state(ERunningState _running_state) 00804 { 00805 if( running_state() != RUNNING && _running_state == RUNNING ) { 00806 if( static_num_running_algorithms == 0 ) { 00807 // Register the signal handler for the SIGINT 00808 signal( SIGINT, &sig_handler_interrupt_algorithm ); 00809 static_interrupt_called = false; 00810 static_processed_user_interrupt = false; 00811 } 00812 ++static_num_running_algorithms; 00813 } 00814 else if( running_state() != NOT_RUNNING && _running_state == NOT_RUNNING ) { 00815 --static_num_running_algorithms; 00816 if( static_num_running_algorithms == 0 ) { 00817 // Put back the default signal handler 00818 signal( SIGINT, SIG_DFL ); 00819 static_interrupt_called = false; 00820 static_processed_user_interrupt = false; 00821 } 00822 } 00823 running_state_ = _running_state; 00824 } 00825 00826 void Algorithm::validate_in_state(ERunningState _running_state) const { 00827 const char running_state_name[3][25] = { "NOT_RUNNING" , "RUNNING", "RUNNING_BEING_CONFIGURED" }; 00828 if(running_state() != _running_state) 00829 TEST_FOR_EXCEPTION( 00830 true, InvalidRunningState 00831 ,"Algorithm::validate_in_state(...) : The condition running_state() == " 00832 << running_state_name[running_state()] << " has been violated with " 00833 << " running_state = " << running_state_name[_running_state] ); 00834 } 00835 00836 void Algorithm::validate_not_in_state(ERunningState _running_state) const { 00837 const char running_state_name[3][25] = { "NOT_RUNNING" , "RUNNING", "RUNNING_BEING_CONFIGURED" }; 00838 if(running_state() == _running_state) 00839 TEST_FOR_EXCEPTION( 00840 true, InvalidRunningState 00841 ,"Algorithm::validate_not_in_state(...) : The condition running_state() != " 00842 << running_state_name[running_state()] << " has been violated" ); 00843 } 00844 00845 void Algorithm::validate_not_curr_step(poss_type step_poss) const { 00846 if(step_poss == curr_step_poss_) 00847 TEST_FOR_EXCEPTION( 00848 true, InvalidConfigChange 00849 ,"Algorithm::validate_not_curr_step(step_poss="<<step_poss<<") : " 00850 "Error, You can not modify the step being currently executed" ); 00851 } 00852 00853 void Algorithm::validate_not_next_step(const std::string& step_name) const { 00854 if( step_name == saved_next_step_name_ ) 00855 TEST_FOR_EXCEPTION( 00856 true, InvalidConfigChange, 00857 "Algorithm::validate_not_next_step(step_name): " 00858 "Error, You can not modify name or remove the step given by " 00859 "step_name = what_is_next_name() = " << step_name ); 00860 } 00861 00862 Algorithm::steps_t::iterator Algorithm::step_itr_and_assert(const std::string& step_name) 00863 { 00864 steps_t::iterator itr = step_itr(step_name); 00865 if(itr == steps_.end()) 00866 TEST_FOR_EXCEPTION( 00867 true, DoesNotExist 00868 ,"Algorithm::step_itr(...) : A step with the name " 00869 << step_name << " does not exist." ); 00870 return itr; 00871 } 00872 00873 Algorithm::steps_t::const_iterator Algorithm::step_itr_and_assert(const std::string& step_name) const 00874 { 00875 steps_t::const_iterator itr = step_itr(step_name); 00876 if(itr == steps_.end()) 00877 TEST_FOR_EXCEPTION( 00878 true, DoesNotExist 00879 ,"Algorithm::step_itr(...) : A step with the name " 00880 << step_name << " does not exist." ); 00881 return itr; 00882 } 00883 00884 bool Algorithm::imp_do_step(poss_type step_poss) { 00885 curr_step_poss_ = step_poss; 00886 // do the pre steps in order 00887 if( !imp_do_assoc_steps(PRE_STEP) ) return false; 00888 // do the main step 00889 if( !steps_[curr_step_poss_-1].step_ptr->do_step(*this, curr_step_poss_, DO_MAIN_STEP, 0) ) return false; 00890 // do the post steps in order 00891 if( !imp_do_assoc_steps(POST_STEP) ) return false; 00892 // if you get here all the pre steps, step, and post steps returned true. 00893 if( static_interrupt_status == NOT_INTERRUPTED ) 00894 look_for_interrupt(); 00895 if( static_interrupt_status == STOP_END_STEP ) { 00896 terminate( static_interrupt_terminate_return ); 00897 return false; 00898 } 00899 return true; 00900 } 00901 00902 bool Algorithm::imp_do_assoc_steps(EAssocStepType type) { 00903 assoc_steps_ele_list_t *assoc_list = &assoc_steps_[curr_step_poss_ - 1][type]; 00904 assoc_steps_ele_list_t::iterator itr = assoc_list->begin(); 00905 int n = assoc_list->size(); 00906 for(int i = 1; i <= n; ++itr, ++i) { 00907 if(reconfigured_) { 00908 // The associated step just has reconfigured *this 00909 // so we must update our pointers and iterators. 00910 // Since it is not allowed for this step or its associated steps 00911 // to have been changed, the next associated step to 00912 // execute will not change. 00913 assoc_list = &assoc_steps_[curr_step_poss_ - 1][type]; 00914 itr = assoc_list->begin(); 00915 std::advance( itr, i - 1 ); 00916 reconfigured_ = false; // This works as long as no one else needs to know 00917 // if *this has been reconfigured. 00918 } 00919 if( !(*(*itr).step_ptr).do_step(*this, curr_step_poss_, do_step_type(type), i) ) return false; 00920 } 00921 return true; // All the associated steps returned true. 00922 } 00923 00924 void Algorithm::imp_inform_steps(inform_func_ptr_t inform_func_ptr) 00925 { 00926 steps_t::const_iterator s_itr = steps_.begin(); 00927 assoc_steps_t::const_iterator a_itr = assoc_steps_.begin(); 00928 poss_type step_i = 1; 00929 for(; step_i <= static_cast<poss_type>(num_steps()); ++step_i, ++s_itr, ++a_itr) { 00930 // pre_steps (e.q. 2.-3, 2.-2, 2.-1) 00931 const assoc_steps_ele_list_t &pre_steps = (*a_itr)[PRE_STEP]; 00932 assoc_steps_ele_list_t::const_iterator pre_step_itr = pre_steps.begin(); 00933 for(int pre_step_i = - pre_steps.size(); pre_step_i < 0; ++pre_step_i, ++pre_step_itr) { 00934 ((&*(*pre_step_itr).step_ptr)->*inform_func_ptr)( 00935 *this, step_i, DO_PRE_STEP, pre_steps.size()+pre_step_i+1 00936 ); 00937 } 00938 // The main step. 00939 ((&*(*s_itr).step_ptr)->*inform_func_ptr)( *this, step_i, DO_MAIN_STEP, 0 ); 00940 // post_steps (e.q. 2.1, 2.2, 2.3) 00941 const assoc_steps_ele_list_t &post_steps = (*a_itr)[POST_STEP]; 00942 assoc_steps_ele_list_t::const_iterator post_step_itr = post_steps.begin(); 00943 for(int post_step_i = 1; post_step_i <= static_cast<int>(post_steps.size()); ++post_step_i, ++post_step_itr) { 00944 ((&*(*post_step_itr).step_ptr)->*inform_func_ptr)( 00945 *this, step_i, DO_POST_STEP, post_step_i 00946 ); 00947 } 00948 } 00949 } 00950 00951 void Algorithm::imp_print_algorithm(std::ostream& out, bool print_steps) const 00952 { 00953 using Teuchos::typeName; 00954 const std::string leading_str = " "; 00955 00956 steps_t::const_iterator s_itr = steps_.begin(); 00957 assoc_steps_t::const_iterator a_itr = assoc_steps_.begin(); 00958 poss_type step_i = 1; 00959 for(; step_i <= static_cast<poss_type>(num_steps()); ++step_i, ++s_itr, ++a_itr) { 00960 // list pre_steps (e.q. 2.-3, 2.-2, 2.-1) 00961 const assoc_steps_ele_list_t &pre_steps = (*a_itr)[PRE_STEP]; 00962 assoc_steps_ele_list_t::const_iterator pre_step_itr = pre_steps.begin(); 00963 for(int pre_step_i = - pre_steps.size(); pre_step_i < 0; ++pre_step_i, ++pre_step_itr) { 00964 out << step_i << "." << pre_step_i << ". \"" 00965 << (*pre_step_itr).name << "\"\n" 00966 << leading_str << "(" << typeName(*(*pre_step_itr).step_ptr) << ")\n"; 00967 if(print_steps) { 00968 (*(*pre_step_itr).step_ptr).print_step( *this, step_i, DO_PRE_STEP 00969 , pre_steps.size()+pre_step_i+1, out, leading_str ); 00970 out << std::endl; 00971 } 00972 } 00973 // The main step. 00974 out << step_i << ". \"" << (*s_itr).name 00975 << "\"\n" 00976 << leading_str << "(" << typeName(*(*s_itr).step_ptr) << ")\n"; 00977 if(print_steps) { 00978 (*(*s_itr).step_ptr).print_step( *this, step_i, DO_MAIN_STEP, 0, out, leading_str ); 00979 out << std::endl; 00980 } 00981 // list post_steps (e.q. 2.1, 2.2, 2.3) 00982 const assoc_steps_ele_list_t &post_steps = (*a_itr)[POST_STEP]; 00983 assoc_steps_ele_list_t::const_iterator post_step_itr = post_steps.begin(); 00984 for(int post_step_i = 1; post_step_i <= static_cast<poss_type>(post_steps.size()); ++post_step_i, ++post_step_itr) { 00985 out << step_i << "." << post_step_i << ". \"" 00986 << (*post_step_itr).name << "\"\n" 00987 << leading_str << "(" << typeName(*(*post_step_itr).step_ptr) << ")\n"; 00988 if(print_steps) { 00989 (*(*post_step_itr).step_ptr).print_step( *this, step_i, DO_POST_STEP, post_step_i 00990 , out, leading_str ); 00991 out << std::endl; 00992 } 00993 } 00994 } 00995 if(print_steps) { 00996 out 00997 << step_i << ". \"Major Loop\" :\n" 00998 << " if k >= max_iter then\n" 00999 << " terminate the algorithm\n" 01000 << " elseif run_time() >= max_run_time then\n" 01001 << " terminate the algorithm\n" 01002 << " else\n" 01003 << " k = k + 1\n" 01004 << " goto 1\n" 01005 << " end\n"; 01006 } 01007 } 01008 01009 // validate poss 01010 01011 Algorithm::poss_type Algorithm::validate(poss_type step_poss, int past_end) const 01012 { 01013 01014 TEST_FOR_EXCEPTION( 01015 step_poss < 1 || steps_.size() + past_end < step_poss, DoesNotExist 01016 ,"Algorithm::validate(step_poss) : The step_poss = " << step_poss 01017 << " is not in range of 1 to " << steps_.size() + past_end ); 01018 return step_poss; 01019 } 01020 01021 Algorithm::poss_type Algorithm::validate(const assoc_steps_ele_list_t& assoc_list 01022 , poss_type assoc_step_poss, int past_end) const 01023 { 01024 TEST_FOR_EXCEPTION( 01025 assoc_step_poss < 1 || assoc_list.size() + past_end < assoc_step_poss, DoesNotExist 01026 ,"Algorithm::validate(assoc_list,assoc_step_poss) : The assoc_step_poss = " 01027 << assoc_step_poss << " is not in range of 1 to " << assoc_list.size() + past_end ); 01028 return assoc_step_poss; 01029 } 01030 01031 void Algorithm::look_for_interrupt() 01032 { 01033 // 01034 // Get the mode of aborting from the user! 01035 // 01036 if( static_interrupt_called && !static_processed_user_interrupt && static_proc_rank == 0 ) { 01037 // Allow for another interrupt possibly 01038 static_interrupt_called = false; 01039 // 01040 // Get the response from the user 01041 // 01042 enum EResponse { R_ABORT_NOW, R_CONTINUE, R_STOP_END_STEP, R_STOP_END_ITER }; 01043 EResponse response = R_ABORT_NOW; 01044 const int max_tries = 3; 01045 bool valid_response = false; 01046 for( int tries = 0; !valid_response && tries < max_tries; ++tries ) { 01047 std::cerr 01048 << "\nIterationPack::Algorithm: Received signal SIGINT." 01049 << "\nJust completed current step curr_step_name = \"" 01050 << get_step_name(curr_step_poss_) << "\", curr_step_poss = " 01051 << curr_step_poss_ << " of steps [1..." << num_steps() << "]." 01052 << "\nDo you want to:\n" 01053 << " (a) Abort the program immediately?\n" 01054 << " (c) Continue with the algorithm?\n" 01055 << " (s) Gracefully terminate the algorithm at the end of this step?\n" 01056 << " (i) Gracefully terminate the algorithm at the end of this iteration?\n" 01057 << "Answer a, c, s or i ? "; 01058 char abort_mode = 'a'; 01059 std::cin >> abort_mode; 01060 if( abort_mode == 'a' ) { 01061 response = R_ABORT_NOW; 01062 valid_response = true; 01063 } 01064 else if( abort_mode == 'c' ) { 01065 response = R_CONTINUE; 01066 valid_response = true; 01067 } 01068 else if( abort_mode == 's' || abort_mode == 'i' ) { 01069 if( abort_mode == 's') 01070 response = R_STOP_END_STEP; 01071 else 01072 response = R_STOP_END_ITER; 01073 std::cerr 01074 << "\nTerminate the algorithm with true (t) or false (f) ? "; 01075 std::cin >> abort_mode; 01076 if( abort_mode == 't' ) { 01077 static_interrupt_terminate_return = true; 01078 valid_response = true; 01079 } 01080 else if( abort_mode == 'f' ) { 01081 static_interrupt_terminate_return = false; 01082 valid_response = true; 01083 } 01084 else { 01085 std::cerr << "Invalid response! Expecting \'t\' or \'f\'\n"; 01086 } 01087 } 01088 else { 01089 std::cerr << "\nInvalid response! Expecting \'a\', \'c\', \'s\' or \'i\'\n"; 01090 } 01091 std::cerr << std::endl; 01092 } 01093 if(!valid_response) { 01094 std::cerr << "Three strikes, you are out!\n"; 01095 } 01096 // 01097 // Interpret the response 01098 // 01099 switch(response) { 01100 case R_ABORT_NOW: { 01101 static_interrupt_status = ABORT_PROGRAM; 01102 break; 01103 } 01104 case R_CONTINUE: { 01105 static_interrupt_status = NOT_INTERRUPTED; 01106 break; 01107 } 01108 case R_STOP_END_STEP: { 01109 static_interrupt_status = STOP_END_STEP; 01110 break; 01111 } 01112 case R_STOP_END_ITER: { 01113 static_interrupt_status = STOP_END_ITER; 01114 break; 01115 } 01116 default: { 01117 TEST_FOR_EXCEPT(true); 01118 } 01119 } 01120 static_processed_user_interrupt = true; 01121 } 01122 else if( interrupt_file_name().length() && !static_processed_user_interrupt && static_proc_rank == 0 ) { 01123 // 01124 // If there was not an interactive interrupt then look for an 01125 // interrupt file if we have not already done this 01126 // (static_processed_user_interrupt). 01127 // 01128 std::ifstream interrupt_file(interrupt_file_name().c_str()); 01129 if(interrupt_file) { 01130 std::cerr 01131 << "\nIterationPack::Algorithm: Found the interrupt file \""<<interrupt_file_name()<<"\"!" 01132 << "\nJust completed current step curr_step_name = \"" 01133 << get_step_name(curr_step_poss_) << "\", curr_step_poss = " 01134 << curr_step_poss_ << " of steps [1..." << num_steps() << "].\n"; 01135 char abort_mode = 0; 01136 interrupt_file >> abort_mode; 01137 std::cerr << "Read a value of abort_mode = \'"<<abort_mode<<"\': "; 01138 if( abort_mode == 'a' ) { 01139 std::cerr << "Will abort the program immediatly!\n"; 01140 static_interrupt_status = ABORT_PROGRAM; 01141 } 01142 else if( abort_mode == 's' || abort_mode == 'i' ) { 01143 if( abort_mode == 's') { 01144 std::cerr << "Will abort the program gracefully at the end of this step!\n"; 01145 static_interrupt_status = STOP_END_STEP; 01146 } 01147 else { 01148 std::cerr << "Will abort the program gracefully at the end of this iteration!\n"; 01149 static_interrupt_status = STOP_END_ITER; 01150 } 01151 TEST_FOR_EXCEPTION( 01152 interrupt_file.eof(), std::logic_error, 01153 "IterationPack::Algorithm: Error, expected input for terminate_bool option from the " 01154 "file \""<<interrupt_file_name()<<"\"!" 01155 ); 01156 char terminate_bool = 0; 01157 interrupt_file >> terminate_bool; 01158 std::cerr << "Read a value of terminate_bool = \'"<<terminate_bool<<"\': "; 01159 if( terminate_bool == 't' ) { 01160 std::cerr << "Will return a success flag!\n"; 01161 static_interrupt_terminate_return = true; 01162 } 01163 else if( terminate_bool == 'f' ) { 01164 std::cerr << "Will return a failure flag!\n"; 01165 static_interrupt_terminate_return = false; 01166 } 01167 else { 01168 TEST_FOR_EXCEPTION( 01169 true, std::logic_error 01170 ,"Error, the value of terminate_bool = \'"<<terminate_bool<<"\' is not " 01171 "valid! Valid values include only \'t\' or \'f\'\n" 01172 ); 01173 } 01174 } 01175 else { 01176 TEST_FOR_EXCEPTION( 01177 true, std::logic_error 01178 ,"Error, the value of abort_mode = \'"<<abort_mode<<"\' is not " 01179 "valid! Valid values include only \'a\', \'s\' or \'i\'\n" 01180 ); 01181 } 01182 std::cerr << std::endl; 01183 static_processed_user_interrupt = true; 01184 } 01185 } 01186 // 01187 // Make sure that all of the processes get the same 01188 // response 01189 // 01190 #ifdef HAVE_MPI 01191 const bool query_for_interrupt = true; // ToDo: Make this an external option! 01192 if( static_num_proc > 1 && query_for_interrupt ) { 01193 // 01194 // Here we will do a global reduction to see of a processor has 01195 // recieved an interrupt. Here we will do a sum operation since only the 01196 // root process should be getting these options. 01197 // 01198 int sendbuf[2] = { 0, 0 }; 01199 int recvbuf[2] = { 0, 0 }; 01200 if(static_proc_rank == 0) { 01201 sendbuf[0] = (int)static_interrupt_status; 01202 sendbuf[1] = static_interrupt_terminate_return ? 1 : 0; 01203 } 01204 // Note: this global reduction will synchronize all of the processors! 01205 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 01206 std::cerr << "\np="<<static_proc_rank<<": IterationPack::Algorithm::interrupt(): Calling MPI_Allreduce(...) ...\n"; 01207 #endif 01208 MPI_Allreduce( 01209 sendbuf // sendbuf 01210 ,recvbuf // recvbuf 01211 ,2 // count 01212 ,MPI_INT // datatype 01213 ,MPI_SUM // op 01214 ,MPI_COMM_WORLD // comm (ToDo: Make more general?) 01215 ); 01216 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 01217 std::cerr 01218 << "\np="<<static_proc_rank<<": IterationPack::Algorithm::interrupt(): After MPI_Allreduce(...)" 01219 << "\np="<<static_proc_rank<<": recvbuf[0] = " << recvbuf[0] << ", recvbuf[1] = " << recvbuf[1] << std::endl; 01220 #endif 01221 // Set static_interrupt_status 01222 switch( (EInterruptStatus)recvbuf[0] ) { 01223 case NOT_INTERRUPTED: 01224 static_interrupt_status = NOT_INTERRUPTED; 01225 break; 01226 case STOP_END_STEP: 01227 static_interrupt_status = STOP_END_STEP; 01228 break; 01229 case STOP_END_ITER: 01230 static_interrupt_status = STOP_END_ITER; 01231 break; 01232 case ABORT_PROGRAM: 01233 static_interrupt_status = ABORT_PROGRAM; 01234 break; 01235 default: 01236 std::cerr 01237 << "p=" << static_proc_rank << ": Algorithm::look_for_interrupt(): Error, the globally reduced value of " 01238 "recvbuf[0] = " << recvbuf[0] << " is not valid!"; 01239 std::abort(); 01240 } 01241 // Set static_interrupt_terminate_return 01242 static_interrupt_terminate_return = ( recvbuf[1] == 0 ? false : true ); 01243 } 01244 // 01245 // Abort the program now if the user did not already press Ctrl-C again! 01246 // 01247 if( static_interrupt_status == ABORT_PROGRAM ) { 01248 if( static_proc_rank == 0 ) { 01249 std::cerr << "\nAborting the program now!\n"; 01250 } 01251 std::abort(); 01252 } 01253 #endif 01254 } 01255 01256 // static 01257 01258 void Algorithm::interrupt() 01259 { 01260 // 01261 // This function assumes that every process will recieve the same 01262 // signal which I found to be the case with MPICH. I am not clear 01263 // what the MPI standard says about interrupts so I can not 01264 // guarantee that this is 100% portable. If other behavior is 01265 // needed, this will have to be compiled in differently. 01266 // 01267 // Note: I have found that on MPICH that you can not guarantee that 01268 // only a single signal will be sent to a slave process so this 01269 // function will ignore interupts for slave processes. 01270 // 01271 // Note that you have to be very careful what you do inside of a 01272 // signal handler and in general you should only be setting flags or 01273 // aborting. 01274 // 01275 static_processed_user_interrupt = false; 01276 #ifdef ITERATION_PACK_ALGORITHM_SHOW_MPI_DEBUG_INFO 01277 std::cerr << "\np="<<static_proc_rank<<": IterationPack::Algorithm::interrupt() called!\n"; 01278 #endif 01279 // 01280 // See if an algorithm is possibly even running yet! 01281 // 01282 if( static_num_proc == 0 ) { 01283 if( static_proc_rank == 0 ) 01284 std::cerr 01285 << "\nIterationPack::Algorithm::interrupt(): Received signal SIGINT but an Algorithm " 01286 << "object has not been allocated yet and no algorithm is running.\n" 01287 << "\nAborting the program now!\n"; 01288 std::abort(); 01289 return; // Should not be called! 01290 } 01291 // 01292 // See if we are going to query for an interrupt when running in MPI mode 01293 // 01294 const bool query_for_interrupt = true; // ToDo: Make this an external option! 01295 if( !query_for_interrupt && static_num_proc > 1 ) { 01296 if( static_proc_rank == 0 ) { 01297 std::cerr 01298 << "\nIterationPack::Algorithm::interrupt(): Received signal SIGINT but num_proc = " 01299 << static_num_proc << " > 1 and query_for_interrupt = false so:\n" 01300 << "\nAborting the program now!\n"; 01301 } 01302 std::abort(); 01303 return; // Should not be called! 01304 } 01305 // 01306 // Remember that this interrupt has been called! 01307 // 01308 if( static_proc_rank == 0 ) { 01309 std::cerr 01310 << "\nIterationPack::Algorithm::interrupt(): Received signal SIGINT. " 01311 << "Wait for the end of the current step and respond to an interactive query, " 01312 << "kill the process by sending another signal (i.e. SIGKILL).\n"; 01313 } 01314 static_interrupt_called = true; 01315 } 01316 01317 } // end namespace IterationPack
1.7.4