|
Sierra Toolkit Version of the Day
|
00001 /*------------------------------------------------------------------------*/ 00002 /* stk : Parallel Heterogneous Dynamic unstructured Mesh */ 00003 /* Copyright (2007) Sandia Corporation */ 00004 /* */ 00005 /* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ 00006 /* license for use of this work by or on behalf of the U.S. Government. */ 00007 /* */ 00008 /* This library is free software; you can redistribute it and/or modify */ 00009 /* it under the terms of the GNU Lesser General Public License as */ 00010 /* published by the Free Software Foundation; either version 2.1 of the */ 00011 /* License, or (at your option) any later version. */ 00012 /* */ 00013 /* This library is distributed in the hope that it will be useful, */ 00014 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ 00015 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ 00016 /* Lesser General Public License for more details. */ 00017 /* */ 00018 /* You should have received a copy of the GNU Lesser General Public */ 00019 /* License along with this library; if not, write to the Free Software */ 00020 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ 00021 /* USA */ 00022 /*------------------------------------------------------------------------*/ 00029 #include <iostream> 00030 #include <map> 00031 #include <set> 00032 #include <list> 00033 #include <sstream> 00034 #include <algorithm> 00035 #include <stdexcept> 00036 00037 #include <TPI.h> 00038 00039 #include <stk_search/SearchTypes.hpp> 00040 #include <stk_util/parallel/Parallel.hpp> 00041 #include <stk_util/parallel/ParallelComm.hpp> 00042 #include <stk_util/parallel/ParallelReduce.hpp> 00043 #include <stk_search/OctTreeOps.hpp> 00044 00045 namespace stk { 00046 namespace search { 00047 namespace { 00048 00049 inline unsigned int log2(unsigned int x) 00050 { 00051 unsigned int l=0; 00052 if(x >= 1<<16) { x>>=16; l|=16; } 00053 if(x >= 1<< 8) { x>>= 8; l|= 8; } 00054 if(x >= 1<< 4) { x>>= 4; l|= 4; } 00055 if(x >= 1<< 2) { x>>= 2; l|= 2; } 00056 if(x >= 1<< 1) { l|= 1; } 00057 return l; 00058 } 00059 00060 } 00061 00062 00063 //---------------------------------------------------------------------- 00064 //---------------------------------------------------------------------- 00065 00066 bool hsfc_box_covering( const float * const global_box , 00067 const float * const small_box , 00068 stk::OctTreeKey * const covering , 00069 unsigned & number, 00070 double scale) 00071 { 00072 enum { Dimension = 3 }; 00073 enum { Combinations = 8 }; 00074 00075 const double min = std::numeric_limits<float>::epsilon(); 00076 const double max = 1.0 - min ; 00077 00078 // Determine the unit-box bounds and bisection depth for the box 00079 00080 double ubox_low[ Dimension ] ; 00081 double ubox_up[ Dimension ] ; 00082 00083 bool valid = true ; 00084 00085 // Determine unit box and is maximum length 00086 // The box is bounded by [eps,1-eps]. 00087 00088 double unit_size = 0.0 ; 00089 00090 for ( unsigned i = 0 ; i < Dimension ; ++i ) { 00091 00092 const float global_low = global_box[i] ; 00093 const float global_up = global_box[i+Dimension] ; 00094 const float small_low = small_box[i] ; 00095 const float small_up = small_box[i+Dimension] ; 00096 00097 if ( small_up < global_low ) { 00098 // Entirely less than 'min' 00099 ubox_low[i] = ubox_up[i] = min ; 00100 valid = false ; 00101 } 00102 else if ( global_up < small_low ) { 00103 // Entirely greater than 'max' 00104 ubox_low[i] = ubox_up[i] = max ; 00105 valid = false ; 00106 } 00107 else { 00108 double unit_low = ( small_low - global_low ) * scale ; 00109 double unit_up = ( small_up - global_low ) * scale ; 00110 00111 if ( unit_low < min ) { 00112 unit_low = min ; 00113 valid = false ; 00114 } 00115 00116 if ( max < unit_up ) { 00117 unit_up = max ; 00118 valid = false ; 00119 } 00120 00121 if ( unit_up < unit_low ) { 00122 // A negative volume, consider it a point at the lower 00123 unit_up = unit_low ; 00124 valid = false ; 00125 } 00126 else { 00127 const double tmp_size = unit_up - unit_low ; 00128 if ( unit_size < tmp_size ) { unit_size = tmp_size ; } 00129 } 00130 00131 ubox_low[i] = unit_low ; 00132 ubox_up[i] = unit_up ; 00133 } 00134 } 00135 00136 // Depth is determined by smallest cell depth 00137 // that could contain the small_box 00138 00139 unsigned depth = stk::OctTreeKey::MaxDepth ; 00140 00141 if ( 0 < unit_size ) { 00142 const unsigned size_inv = static_cast<unsigned>(1.0 / unit_size); 00143 depth = log2(size_inv); 00144 if (depth > stk::OctTreeKey::MaxDepth) depth = stk::OctTreeKey::MaxDepth; 00145 } 00146 00147 // Determine the oct-tree nodes for each key 00148 00149 const unsigned shift = stk::OctTreeKey::BitsPerWord - depth ; 00150 const unsigned num_cell = 1 << depth ; 00151 00152 // At most two cells in each axis at this depth 00153 00154 unsigned coord_low[ Dimension ]; 00155 unsigned coord_up[ Dimension ]; 00156 00157 for ( unsigned i = 0 ; i < Dimension ; ++i ) { 00158 const unsigned low = static_cast<unsigned>( ubox_low[i] * num_cell ); 00159 const unsigned up = static_cast<unsigned>( ubox_up[i] * num_cell ); 00160 00161 if ( low + 1 < up ) { 00162 std::string msg("stk::hsfc_box_covering FAILED : depth determination logic error"); 00163 throw std::logic_error( msg ); 00164 } 00165 00166 coord_low[i] = low << shift ; 00167 coord_up[i] = up << shift ; 00168 } 00169 00170 unsigned n = 0 ; 00171 00172 // Combination 0. No duplicate possible, so pull out of loop. 00173 covering[n] = hsfc3d( depth , coord_low ); 00174 ++n ; 00175 00176 for ( unsigned i = 1 ; i < Combinations ; ++i ) { 00177 00178 const bool duplicate = 00179 ( ( i & 01 ) && coord_up[0] == coord_low[0] ) || 00180 ( ( i & 02 ) && coord_up[1] == coord_low[1] ) || 00181 ( ( i & 04 ) && coord_up[2] == coord_low[2] ) ; 00182 00183 if ( ! duplicate ) { 00184 unsigned coord[3] ; 00185 00186 coord[0] = ( i & 01 ) ? coord_up[0] : coord_low[0] ; 00187 coord[1] = ( i & 02 ) ? coord_up[1] : coord_low[1] ; 00188 coord[2] = ( i & 04 ) ? coord_up[2] : coord_low[2] ; 00189 00190 covering[n] = hsfc3d( depth , coord ); 00191 00192 ++n ; 00193 } 00194 } 00195 00196 number = n ; 00197 00198 return valid ; 00199 } 00200 00201 00202 namespace { 00203 00204 //------------------------------------------//---------------------------------------------------------------------- 00205 // Reset the accumulated node weights to only include 00206 // those nodes in the range [ k_first , k_last ] 00207 00208 void accumulate_weights( 00209 const stk::OctTreeKey &k_node_p , 00210 const stk::OctTreeKey &k_first_p , 00211 const unsigned ord_end , 00212 const unsigned depth , 00213 float * const weights ) 00214 { 00215 stk::OctTreeKey k_node (k_node_p); 00216 stk::OctTreeKey k_first(k_first_p); 00217 const unsigned ord_node_2 = 2 * oct_tree_offset( depth , k_node ); 00218 00219 if ( k_node.depth() < depth ) { 00220 00221 double w = 0 ; 00222 00223 const unsigned d1 = k_node.depth() + 1 ; 00224 00225 unsigned i = k_first.index( d1 ); 00226 00227 if ( i ) { 00228 k_node.set_index( d1 , i ); 00229 00230 const unsigned ord = oct_tree_offset( depth , k_node ); 00231 const unsigned ord_2 = ord * 2 ; 00232 00233 accumulate_weights( k_node , k_first , ord_end , depth , weights ); 00234 00235 // Counts of this node and all of its descending nodes 00236 w += weights[ord_2] + weights[ ord_2 + 1 ] ; 00237 00238 k_first = stk::OctTreeKey(); // Done with the lower bound 00239 } 00240 00241 for ( ++i ; i <= 8 ; ++i ) { 00242 00243 k_node.set_index( d1 , i ); 00244 00245 const unsigned ord = oct_tree_offset( depth , k_node ); 00246 const unsigned ord_2 = ord * 2 ; 00247 00248 if ( ord < ord_end ) { 00249 accumulate_weights( k_node, k_first , ord_end , depth , weights ); 00250 00251 // Counts of this node and all of its descending nodes 00252 w += weights[ord_2] + weights[ ord_2 + 1 ] ; 00253 } 00254 } 00255 00256 // Descending node weight 00257 00258 weights[ ord_node_2 + 1 ] = static_cast<float>(w); 00259 } 00260 } 00261 00262 //---------------------------------------------------------------------- 00263 00264 void oct_key_split( 00265 const stk::OctTreeKey & key , 00266 const unsigned upper_ord , 00267 stk::OctTreeKey & key_upper ) 00268 { 00269 // Split key at key.depth() + 1 00270 00271 unsigned d = key.depth(); 00272 00273 key_upper = key ; 00274 00275 if ( upper_ord == 1 ) { // key_upper gets it all 00276 while ( d && 1 == key_upper.index(d) ) { 00277 key_upper.clear_index(d); 00278 --d ; 00279 } 00280 } 00281 else if ( 8 < upper_ord ) { // key_upper get none of it, Increment key_upper 00282 00283 unsigned i = 0 ; 00284 while ( d && 8 == ( i = key_upper.index(d) ) ) { 00285 key_upper.clear_index(d); 00286 --d ; 00287 } 00288 if ( d ) { key_upper.set_index( d , i + 1 ); } 00289 } 00290 else { 00291 key_upper.set_index( d + 1 , upper_ord ); 00292 } 00293 } 00294 00295 //---------------------------------------------------------------------- 00296 00297 void partition( 00298 const stk::OctTreeKey & k_first , 00299 const unsigned i_end , 00300 const stk::OctTreeKey & key , 00301 const unsigned depth , 00302 const float * weights , 00303 const double tolerance , 00304 const double target_ratio , 00305 double w_lower , 00306 double w_upper , 00307 stk::OctTreeKey & k_upper ) 00308 { 00309 const unsigned ord_node = oct_tree_offset( depth , key ); 00310 const float * const w_node = weights + ord_node * 2 ; 00311 00312 const unsigned d1 = key.depth() + 1 ; 00313 00314 // Add weights from nested nodes and their descendents 00315 // Try to achieve the ratio. 00316 00317 const unsigned i_first = k_first.index( d1 ); 00318 00319 unsigned i = ( i_first ) ? i_first : 1 ; 00320 unsigned j = 8 ; 00321 { 00322 stk::OctTreeKey k_upp = key ; 00323 k_upp.set_index( d1 , j ); 00324 while ( i_end <= oct_tree_offset( depth , k_upp ) ) { 00325 k_upp.set_index( d1 , --j ); 00326 } 00327 } 00328 00329 w_lower += w_node[0] ; 00330 w_upper += w_node[0] ; 00331 00332 // At the maximum depth? 00333 00334 if ( key.depth() == depth ) { 00335 // Assume weight from unrepresented nested nodes is 00336 // evenly distributed among the nodes in the span [i,j] 00337 00338 const unsigned n = 1 + j - i ; 00339 00340 const double val = static_cast<double>(w_node[1]) / static_cast<double>(n); 00341 00342 // val = val_lower + val_upper 00343 // ( w_lower + val_lower ) / ( w_upper + val_upper ) == target_ratio 00344 00345 const double val_lower = 00346 ( target_ratio * ( w_upper + val ) - w_lower ) / 00347 ( target_ratio + 1 ) ; 00348 00349 if ( 0 < val_lower ) { 00350 // How much of the range does the lower portion get? 00351 // Roundoff instead of merely truncating: 00352 i += static_cast<unsigned>( 0.5 + ( n * val_lower ) / val ); 00353 00354 // Can only get up to the maximum 00355 if ( j < i ) { i = j ; } 00356 } 00357 oct_key_split( key , i , k_upper ); 00358 } 00359 else { 00360 00361 // while ( i != j ) { 00362 while ( i < j ) { 00363 stk::OctTreeKey ki = key ; ki.set_index( d1 , i ); 00364 stk::OctTreeKey kj = key ; kj.set_index( d1 , j ); 00365 00366 const float * const vi = weights + 2 * oct_tree_offset( depth , ki ); 00367 const float * const vj = weights + 2 * oct_tree_offset( depth , kj ); 00368 00369 const double vali = vi[0] + vi[1] ; 00370 const double valj = vj[0] + vj[1] ; 00371 00372 if ( 0 < vali && 0 < valj ) { 00373 00374 // Choose between ( w_lower += vali ) vs. ( w_upper += valj ) 00375 // Knowing that the skipped value will be revisited. 00376 00377 if ( ( w_lower + vali ) < target_ratio * ( w_upper + valj ) ) { 00378 // Add to 'w_lower' and will still need more later 00379 w_lower += vali ; 00380 ++i ; 00381 } 00382 else { 00383 // Add to 'w_upper' and will still need more later 00384 w_upper += valj ; 00385 --j ; 00386 } 00387 } 00388 else { 00389 if ( vali <= 0.0 ) { ++i ; } 00390 if ( valj <= 0.0 ) { --j ; } 00391 } 00392 } 00393 00394 // If 'i' has not incremented then 'k_first' is still in force 00395 stk::OctTreeKey nested_k_first ; 00396 if ( i_first == i ) { nested_k_first = k_first ; } 00397 00398 // Split node nested[i] ? 00399 stk::OctTreeKey ki = key ; ki.set_index( d1 , i ); 00400 00401 const float * const vi = weights + 2 * oct_tree_offset( depth , ki ); 00402 const double vali = vi[0] + vi[1] ; 00403 00404 double diff = 0.0 ; 00405 00406 if ( vali <= 0.0 ) { 00407 diff = 0.0 ; // Nothing can be done. Give 'i' to the upper range 00408 } 00409 else if ( w_lower < w_upper * target_ratio ) { 00410 // Try adding to w_lower 00411 diff = static_cast<double> (w_lower + vali) / static_cast<double>(w_upper) - target_ratio ; 00412 ++i ; 00413 } 00414 else { 00415 // Try adding to w_upper 00416 diff = static_cast<double>(w_lower) / static_cast<double>(w_upper + vali) - target_ratio ; 00417 } 00418 00419 if ( - tolerance < diff && diff < tolerance ) { 00420 oct_key_split( key , i , k_upper ); 00421 } 00422 else { 00423 partition( nested_k_first , i_end , ki , 00424 depth , weights , 00425 tolerance , target_ratio , 00426 w_lower , w_upper , k_upper ); 00427 } 00428 } 00429 } 00430 00431 } // namespace <empty> 00432 00433 unsigned processor( const stk::OctTreeKey * const cuts_b , 00434 const stk::OctTreeKey * const cuts_e , 00435 const stk::OctTreeKey & key ) 00436 { 00437 const stk::OctTreeKey * const cuts_p = std::upper_bound( cuts_b , cuts_e , key ); 00438 00439 if ( cuts_p == cuts_b ) { 00440 std::string msg("stk::processor FAILED: Bad cut-key array"); 00441 throw std::runtime_error(msg); 00442 } 00443 00444 return ( cuts_p - cuts_b ) - 1 ; 00445 } 00446 00447 //---------------------------------------------------------------------- 00448 00449 void oct_tree_partition_private( 00450 const unsigned p_first , 00451 const unsigned p_end , 00452 const unsigned depth , 00453 const double tolerance , 00454 float * const weights , 00455 const unsigned cuts_length , 00456 stk::OctTreeKey * const cuts ) 00457 { 00458 // split tree between [ p_first , p_end ) 00459 const unsigned p_size = p_end - p_first ; 00460 const unsigned p_upper = ( p_end + p_first ) / 2 ; 00461 00462 const double target_fraction = 00463 static_cast<double> ( p_upper - p_first ) / static_cast<double>(p_size); 00464 00465 const double target_ratio = target_fraction / ( 1.0 - target_fraction ); 00466 00467 // Determine k_lower and k_upper such that 00468 // 00469 // Weight[ k_first , k_lower ] / Weight [ k_upper , k_last ] == target_ratio 00470 // 00471 // Within a tollerance 00472 00473 const stk::OctTreeKey k_first = cuts[ p_first ]; 00474 00475 const unsigned i_end = 00476 p_end < cuts_length ? oct_tree_offset( depth , cuts[ p_end ] ) 00477 : oct_tree_size( depth ); 00478 00479 // Walk the tree [ k_first , k_last ] and accumulate weight 00480 00481 accumulate_weights( stk::OctTreeKey() , k_first , i_end , depth , weights ); 00482 00483 stk::OctTreeKey k_root ; 00484 stk::OctTreeKey & k_upper = cuts[ p_upper ] ; 00485 00486 unsigned w_lower = 0 ; 00487 unsigned w_upper = 0 ; 00488 00489 partition( k_first, i_end, k_root , 00490 depth, weights, 00491 tolerance, target_ratio, 00492 w_lower, w_upper, k_upper ); 00493 00494 const bool nested_lower_split = p_first + 1 < p_upper ; 00495 const bool nested_upper_split = p_upper + 1 < p_end ; 00496 00497 // If splitting both lower and upper, and a thread is available 00498 // then one of the next two calls could be a parallel thread 00499 // with a local copy of the shared 'weights' array. 00500 00501 if ( nested_lower_split ) { 00502 oct_tree_partition_private( p_first, p_upper, depth, 00503 tolerance, weights, cuts_length, cuts ); 00504 } 00505 00506 if ( nested_upper_split ) { 00507 oct_tree_partition_private( p_upper, p_end, depth, 00508 tolerance, weights, cuts_length, cuts ); 00509 } 00510 } 00511 00512 } // namespace search 00513 } // namespace stk