|
RTOpPack: Extra C/C++ Code for Vector Reduction/Transformation Operators Version of the Day
|
00001 /* 00002 // @HEADER 00003 // *********************************************************************** 00004 // 00005 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00006 // Copyright (2003) Sandia Corporation 00007 // 00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00009 // license for use of this work by or on behalf of the U.S. Government. 00010 // 00011 // This library is free software; you can redistribute it and/or modify 00012 // it under the terms of the GNU Lesser General Public License as 00013 // published by the Free Software Foundation; either version 2.1 of the 00014 // License, or (at your option) any later version. 00015 // 00016 // This library is distributed in the hope that it will be useful, but 00017 // WITHOUT ANY WARRANTY; without even the implied warranty of 00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00019 // Lesser General Public License for more details. 00020 // 00021 // You should have received a copy of the GNU Lesser General Public 00022 // License along with this library; if not, write to the Free Software 00023 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00024 // USA 00025 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00026 // 00027 // *********************************************************************** 00028 // @HEADER 00029 */ 00030 00031 #include "RTOp_apply_op_mpi.h" 00032 #include "RTOp_parallel_helpers.h" 00033 #include "RTOpToMPI.h" 00034 00035 #include <stdlib.h> 00036 00037 int RTOp_apply_op_mpi( 00038 MPI_Comm comm 00039 ,RTOp_index_type global_dim_in, RTOp_index_type local_sub_dim_in, RTOp_index_type local_offset_in 00040 ,const int num_cols 00041 ,const int num_vecs, const RTOp_value_type* l_vec_ptrs[], const ptrdiff_t l_vec_strides[], const ptrdiff_t l_vec_leading_dim[] 00042 ,const int num_targ_vecs, RTOp_value_type* l_targ_vec_ptrs[], const ptrdiff_t l_targ_vec_strides[], const ptrdiff_t l_targ_vec_leading_dim[] 00043 ,const RTOp_index_type first_ele_in, const RTOp_index_type sub_dim_in, const RTOp_index_type global_offset_in 00044 ,const struct RTOp_RTOp* op 00045 ,RTOp_ReductTarget reduct_objs[] 00046 ) 00047 { 00048 int err = 0; 00049 struct RTOp_SubVector *local_vecs = NULL; 00050 struct RTOp_MutableSubVector *local_targ_vecs = NULL; 00051 RTOp_index_type overlap_first_local_ele = 0; 00052 RTOp_index_type overalap_local_sub_dim = 0; 00053 RTOp_index_type overlap_global_offset = 0; 00054 int k; 00055 int kc; 00056 /* Validate the input */ 00057 #ifdef RTOp_DEBUG 00058 assert( num_vecs || num_targ_vecs ); 00059 if(num_vecs) 00060 assert( l_vec_ptrs != NULL ); 00061 if(num_targ_vecs) 00062 assert( l_targ_vec_ptrs != NULL ); 00063 assert( 0 <= sub_dim_in && sub_dim_in <= global_dim_in ); 00064 #endif 00065 /* Pre-initialize the local sub-vectors */ 00066 if(num_vecs) { 00067 local_vecs = malloc( sizeof(struct RTOp_SubVector) * num_vecs * num_cols ); 00068 for( kc = 0; kc < num_cols; ++kc ) { 00069 for( k = 0; k < num_vecs; ++k ) 00070 RTOp_sub_vector_null(&local_vecs[kc*num_cols+k]); 00071 } 00072 } 00073 if(num_targ_vecs) { 00074 local_targ_vecs = malloc( sizeof(struct RTOp_MutableSubVector) * num_targ_vecs ); 00075 for( kc = 0; kc < num_cols; ++kc ) { 00076 for( k = 0; k < num_targ_vecs; ++k ) 00077 RTOp_mutable_sub_vector_null(&local_targ_vecs[kc*num_cols+k]); 00078 } 00079 } 00080 /* Get the overlap in the current process with the input logical sub-vector */ 00081 /* from (first_ele_in,sub_dim_in,global_offset_in) */ 00082 RTOp_parallel_calc_overlap( 00083 global_dim_in, local_sub_dim_in, local_offset_in, first_ele_in, sub_dim_in, global_offset_in 00084 ,&overlap_first_local_ele, &overalap_local_sub_dim, &overlap_global_offset 00085 ); 00086 if( overlap_first_local_ele != 0 ) { 00087 /* Sub-vector structs for the local elements that are to participate in the */ 00088 /* reduction/transforamtion operation. */ 00089 for( kc = 0; kc < num_cols; ++kc ) { 00090 for(k = 0; k < num_vecs; ++k) { 00091 RTOp_sub_vector( 00092 overlap_global_offset /* global_offset */ 00093 ,overalap_local_sub_dim /* sub_dim */ 00094 ,l_vec_ptrs[k]+(overlap_first_local_ele-1)*l_vec_strides[k] 00095 + ( num_cols > 1 ? kc*l_vec_leading_dim[k] : 0 ) /* values */ 00096 ,l_vec_strides[k] /* values_stride */ 00097 ,&local_vecs[kc*num_cols+k] 00098 ); 00099 } 00100 for(k = 0; k < num_targ_vecs; ++k) { 00101 RTOp_mutable_sub_vector( 00102 overlap_global_offset /* global_offset */ 00103 ,overalap_local_sub_dim /* sub_dim */ 00104 ,l_targ_vec_ptrs[k]+(overlap_first_local_ele-1)*l_targ_vec_strides[k] 00105 + ( num_cols > 1 ? kc*l_targ_vec_leading_dim[k] : 0 ) /* values */ 00106 ,l_targ_vec_strides[k] /* values_stride */ 00107 ,&local_targ_vecs[kc*num_cols+k] 00108 ); 00109 } 00110 } 00111 } 00112 /* */ 00113 /* Apply the reduction operation over the sub-vectors in */ 00114 /* this process then collect the reductions over */ 00115 /* all the processes and return the result */ 00116 /* to all the processes (including this one of course). */ 00117 /* If all of the sub-svectors are empty then this will */ 00118 /* just call the reduction operation with NULL sub-vectors */ 00119 /* */ 00120 err = RTOp_MPI_apply_op( 00121 comm, op, -1 /* MPI_Allreduce(...) */ 00122 ,num_cols 00123 ,num_vecs, num_vecs && overlap_first_local_ele ? &local_vecs[0] : NULL 00124 ,num_targ_vecs, num_targ_vecs && overlap_first_local_ele ? &local_targ_vecs[0] : NULL 00125 ,reduct_objs 00126 ); 00127 00128 if(local_vecs) free(local_vecs); 00129 if(local_targ_vecs) free(local_targ_vecs); 00130 00131 /* Deallocate memory */ 00132 00133 return err; 00134 }
1.7.4