|
Teuchos - Trilinos Tools Package Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Teuchos: Common Tools Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 // Kris 00030 // 06.16.03 -- Start over from scratch 00031 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed) 00032 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77() 00033 // -- Added warning messages for generic calls 00034 // 07.08.03 -- Move into Teuchos package/namespace 00035 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats: 00036 // * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.) 00037 // * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible. 00038 // * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust. 00039 // * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial. 00040 // * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well. 00041 // -- Removed warning messages for generic calls 00042 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented. 00043 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information. 00044 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ). 00045 00046 #ifndef _TEUCHOS_BLAS_HPP_ 00047 #define _TEUCHOS_BLAS_HPP_ 00048 00056 /* for INTEL_CXML, the second arg may need to be changed to 'one'. If so 00057 the appropriate declaration of one will need to be added back into 00058 functions that include the macro: 00059 */ 00060 #if defined (INTEL_CXML) 00061 unsigned int one=1; 00062 #endif 00063 00064 #ifdef CHAR_MACRO 00065 #undef CHAR_MACRO 00066 #endif 00067 #if defined (INTEL_CXML) 00068 #define CHAR_MACRO(char_var) &char_var, one 00069 #else 00070 #define CHAR_MACRO(char_var) &char_var 00071 #endif 00072 00073 #include "Teuchos_ConfigDefs.hpp" 00074 #include "Teuchos_ScalarTraits.hpp" 00075 #include "Teuchos_OrdinalTraits.hpp" 00076 #include "Teuchos_BLAS_types.hpp" 00077 #include "Teuchos_TestForException.hpp" 00078 00112 namespace Teuchos 00113 { 00114 extern TEUCHOS_LIB_DLL_EXPORT const char ESideChar[]; 00115 extern TEUCHOS_LIB_DLL_EXPORT const char ETranspChar[]; 00116 extern TEUCHOS_LIB_DLL_EXPORT const char EUploChar[]; 00117 extern TEUCHOS_LIB_DLL_EXPORT const char EDiagChar[]; 00118 00120 00125 template<typename OrdinalType, typename ScalarType> 00126 class DefaultBLASImpl 00127 { 00128 00129 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType; 00130 00131 public: 00133 00134 00136 inline DefaultBLASImpl(void) {} 00137 00139 inline DefaultBLASImpl(const DefaultBLASImpl<OrdinalType, ScalarType>& /*BLAS_source*/) {} 00140 00142 inline virtual ~DefaultBLASImpl(void) {} 00144 00146 00147 00149 void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const; 00150 00152 void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const; 00153 00155 void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const; 00156 00158 void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const; 00159 00161 template <typename alpha_type, typename x_type> 00162 void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const; 00163 00165 typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const; 00166 00168 template <typename x_type, typename y_type> 00169 ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const; 00170 00172 typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const; 00173 00175 OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const; 00176 00178 00180 00181 00183 template <typename alpha_type, typename A_type, typename x_type, typename beta_type> 00184 void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, 00185 const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const; 00186 00188 template <typename A_type> 00189 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, 00190 const OrdinalType lda, ScalarType* x, const OrdinalType incx) const; 00191 00193 template <typename alpha_type, typename x_type, typename y_type> 00194 void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, 00195 const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const; 00197 00199 00200 00202 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 00203 void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const; 00204 00206 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 00207 void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const; 00208 00210 template <typename alpha_type, typename A_type> 00211 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, 00212 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const; 00213 00215 template <typename alpha_type, typename A_type> 00216 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, 00217 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const; 00219 }; 00220 00221 template<typename OrdinalType, typename ScalarType> 00222 class TEUCHOS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType> 00223 { 00224 00225 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType; 00226 00227 public: 00229 00230 00232 inline BLAS(void) {} 00233 00235 inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {} 00236 00238 inline virtual ~BLAS(void) {} 00240 }; 00241 00242 //------------------------------------------------------------------------------------------ 00243 // LEVEL 1 BLAS ROUTINES 00244 //------------------------------------------------------------------------------------------ 00245 00246 template<typename OrdinalType, typename ScalarType> 00247 void DefaultBLASImpl<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const 00248 { 00249 ScalarType roe, alpha; 00250 ScalarType zero = ScalarTraits<ScalarType>::zero(); 00251 ScalarType one = ScalarTraits<ScalarType>::one(); 00252 MagnitudeType scale, norm; 00253 MagnitudeType m_one = ScalarTraits<MagnitudeType>::one(); 00254 MagnitudeType m_zero = ScalarTraits<MagnitudeType>::zero(); 00255 00256 roe = *db; 00257 if ( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ) { roe = *da; } 00258 scale = ScalarTraits<ScalarType>::magnitude( *da ) + ScalarTraits<ScalarType>::magnitude( *db ); 00259 if ( scale == m_zero ) // There is nothing to do. 00260 { 00261 *c = m_one; 00262 *s = zero; 00263 *da = zero; *db = zero; 00264 } 00265 else if ( *da == zero ) // Still nothing to do. 00266 { 00267 *c = m_zero; 00268 *s = one; 00269 *da = *db; *db = zero; 00270 } 00271 else 00272 { // Compute the Givens rotation. 00273 norm = scale*ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) ) ); 00274 alpha = roe / ScalarTraits<ScalarType>::magnitude(roe); 00275 *c = ScalarTraits<ScalarType>::magnitude(*da) / norm; 00276 *s = alpha * ScalarTraits<ScalarType>::conjugate(*db) / norm; 00277 *db = one; 00278 if( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ){ *db = *s; } 00279 if( ScalarTraits<ScalarType>::magnitude( *db ) >= ScalarTraits<ScalarType>::magnitude( *da ) && 00280 *c != ScalarTraits<MagnitudeType>::zero() ) { *db = one / *c; } 00281 *da = norm * alpha; 00282 } 00283 } /* end ROTG */ 00284 00285 template<typename OrdinalType, typename ScalarType> 00286 void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const 00287 { 00288 ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s); 00289 if (n <= 0) return; 00290 if (incx==1 && incy==1) { 00291 for(OrdinalType i=0; i<n; ++i) { 00292 ScalarType temp = *c*dx[i] + sconj*dy[i]; 00293 dy[i] = *c*dy[i] - sconj*dx[i]; 00294 dx[i] = temp; 00295 } 00296 } 00297 else { 00298 OrdinalType ix = 0, iy = 0; 00299 if (incx < 0) ix = (-n+1)*incx; 00300 if (incy < 0) iy = (-n+1)*incy; 00301 for(OrdinalType i=0; i<n; ++i) { 00302 ScalarType temp = *c*dx[ix] + sconj*dy[iy]; 00303 dy[iy] = *c*dy[iy] - sconj*dx[ix]; 00304 dx[ix] = temp; 00305 ix += incx; 00306 iy += incy; 00307 } 00308 } 00309 } 00310 00311 template<typename OrdinalType, typename ScalarType> 00312 void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const 00313 { 00314 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00315 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00316 OrdinalType i, ix = izero; 00317 if ( n > izero ) { 00318 // Set the initial index (ix). 00319 if (incx < izero) { ix = (-n+ione)*incx; } 00320 // Scale the std::vector. 00321 for(i = izero; i < n; i++) 00322 { 00323 x[ix] *= alpha; 00324 ix += incx; 00325 } 00326 } 00327 } /* end SCAL */ 00328 00329 template<typename OrdinalType, typename ScalarType> 00330 void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const 00331 { 00332 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00333 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00334 OrdinalType i, ix = izero, iy = izero; 00335 if ( n > izero ) { 00336 // Set the initial indices (ix, iy). 00337 if (incx < izero) { ix = (-n+ione)*incx; } 00338 if (incy < izero) { iy = (-n+ione)*incy; } 00339 00340 for(i = izero; i < n; i++) 00341 { 00342 y[iy] = x[ix]; 00343 ix += incx; 00344 iy += incy; 00345 } 00346 } 00347 } /* end COPY */ 00348 00349 template<typename OrdinalType, typename ScalarType> 00350 template <typename alpha_type, typename x_type> 00351 void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const 00352 { 00353 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00354 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00355 OrdinalType i, ix = izero, iy = izero; 00356 if( n > izero && alpha != ScalarTraits<alpha_type>::zero()) 00357 { 00358 // Set the initial indices (ix, iy). 00359 if (incx < izero) { ix = (-n+ione)*incx; } 00360 if (incy < izero) { iy = (-n+ione)*incy; } 00361 00362 for(i = izero; i < n; i++) 00363 { 00364 y[iy] += alpha * x[ix]; 00365 ix += incx; 00366 iy += incy; 00367 } 00368 } 00369 } /* end AXPY */ 00370 00371 template<typename OrdinalType, typename ScalarType> 00372 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const 00373 { 00374 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00375 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00376 typename ScalarTraits<ScalarType>::magnitudeType result = 00377 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero(); 00378 OrdinalType i, ix = izero; 00379 if( n > izero ) { 00380 // Set the initial indices 00381 if (incx < izero) { ix = (-n+ione)*incx; } 00382 00383 for(i = izero; i < n; i++) 00384 { 00385 result += ScalarTraits<ScalarType>::magnitude(x[ix]); 00386 ix += incx; 00387 } 00388 } 00389 return result; 00390 } /* end ASUM */ 00391 00392 template<typename OrdinalType, typename ScalarType> 00393 template <typename x_type, typename y_type> 00394 ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const 00395 { 00396 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00397 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00398 ScalarType result = ScalarTraits<ScalarType>::zero(); 00399 OrdinalType i, ix = izero, iy = izero; 00400 if( n > izero ) 00401 { 00402 // Set the initial indices (ix,iy). 00403 if (incx < izero) { ix = (-n+ione)*incx; } 00404 if (incy < izero) { iy = (-n+ione)*incy; } 00405 00406 for(i = izero; i < n; i++) 00407 { 00408 result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy]; 00409 ix += incx; 00410 iy += incy; 00411 } 00412 } 00413 return result; 00414 } /* end DOT */ 00415 00416 template<typename OrdinalType, typename ScalarType> 00417 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const 00418 { 00419 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00420 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00421 typename ScalarTraits<ScalarType>::magnitudeType result = 00422 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero(); 00423 OrdinalType i, ix = izero; 00424 if ( n > izero ) 00425 { 00426 // Set the initial index. 00427 if (incx < izero) { ix = (-n+ione)*incx; } 00428 00429 for(i = izero; i < n; i++) 00430 { 00431 result += ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix]); 00432 ix += incx; 00433 } 00434 result = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::squareroot(result); 00435 } 00436 return result; 00437 } /* end NRM2 */ 00438 00439 template<typename OrdinalType, typename ScalarType> 00440 OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const 00441 { 00442 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00443 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00444 OrdinalType result = izero, ix = izero, i; 00445 typename ScalarTraits<ScalarType>::magnitudeType maxval = 00446 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero(); 00447 00448 if ( n > izero ) 00449 { 00450 if (incx < izero) { ix = (-n+ione)*incx; } 00451 maxval = ScalarTraits<ScalarType>::magnitude(x[ix]); 00452 ix += incx; 00453 for(i = ione; i < n; i++) 00454 { 00455 if(ScalarTraits<ScalarType>::magnitude(x[ix]) > maxval) 00456 { 00457 result = i; 00458 maxval = ScalarTraits<ScalarType>::magnitude(x[ix]); 00459 } 00460 ix += incx; 00461 } 00462 } 00463 return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values 00464 } /* end IAMAX */ 00465 00466 //------------------------------------------------------------------------------------------ 00467 // LEVEL 2 BLAS ROUTINES 00468 //------------------------------------------------------------------------------------------ 00469 template<typename OrdinalType, typename ScalarType> 00470 template <typename alpha_type, typename A_type, typename x_type, typename beta_type> 00471 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const 00472 { 00473 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00474 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00475 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 00476 beta_type beta_zero = ScalarTraits<beta_type>::zero(); 00477 x_type x_zero = ScalarTraits<x_type>::zero(); 00478 ScalarType y_zero = ScalarTraits<ScalarType>::zero(); 00479 beta_type beta_one = ScalarTraits<beta_type>::one(); 00480 bool BadArgument = false; 00481 00482 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error, 00483 "Teuchos::BLAS::GEMV() does not currently support CONJ_TRANS for complex data types."); 00484 00485 // Quick return if there is nothing to do! 00486 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; } 00487 00488 // Otherwise, we need to check the argument list. 00489 if( m < izero ) { 00490 std::cout << "BLAS::GEMV Error: M == " << m << std::endl; 00491 BadArgument = true; 00492 } 00493 if( n < izero ) { 00494 std::cout << "BLAS::GEMV Error: N == " << n << std::endl; 00495 BadArgument = true; 00496 } 00497 if( lda < m ) { 00498 std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl; 00499 BadArgument = true; 00500 } 00501 if( incx == izero ) { 00502 std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl; 00503 BadArgument = true; 00504 } 00505 if( incy == izero ) { 00506 std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl; 00507 BadArgument = true; 00508 } 00509 00510 if(!BadArgument) { 00511 OrdinalType i, j, lenx, leny, ix, iy, jx, jy; 00512 OrdinalType kx = izero, ky = izero; 00513 ScalarType temp; 00514 00515 // Determine the lengths of the vectors x and y. 00516 if(ETranspChar[trans] == 'N') { 00517 lenx = n; 00518 leny = m; 00519 } else { 00520 lenx = m; 00521 leny = n; 00522 } 00523 00524 // Set the starting pointers for the vectors x and y if incx/y < 0. 00525 if (incx < izero ) { kx = (ione - lenx)*incx; } 00526 if (incy < izero ) { ky = (ione - leny)*incy; } 00527 00528 // Form y = beta*y 00529 ix = kx; iy = ky; 00530 if(beta != beta_one) { 00531 if (incy == ione) { 00532 if (beta == beta_zero) { 00533 for(i = izero; i < leny; i++) { y[i] = y_zero; } 00534 } else { 00535 for(i = izero; i < leny; i++) { y[i] *= beta; } 00536 } 00537 } else { 00538 if (beta == beta_zero) { 00539 for(i = izero; i < leny; i++) { 00540 y[iy] = y_zero; 00541 iy += incy; 00542 } 00543 } else { 00544 for(i = izero; i < leny; i++) { 00545 y[iy] *= beta; 00546 iy += incy; 00547 } 00548 } 00549 } 00550 } 00551 00552 // Return if we don't have to do anything more. 00553 if(alpha == alpha_zero) { return; } 00554 00555 if( ETranspChar[trans] == 'N' ) { 00556 // Form y = alpha*A*y 00557 jx = kx; 00558 if (incy == ione) { 00559 for(j = izero; j < n; j++) { 00560 if (x[jx] != x_zero) { 00561 temp = alpha*x[jx]; 00562 for(i = izero; i < m; i++) { 00563 y[i] += temp*A[j*lda + i]; 00564 } 00565 } 00566 jx += incx; 00567 } 00568 } else { 00569 for(j = izero; j < n; j++) { 00570 if (x[jx] != x_zero) { 00571 temp = alpha*x[jx]; 00572 iy = ky; 00573 for(i = izero; i < m; i++) { 00574 y[iy] += temp*A[j*lda + i]; 00575 iy += incy; 00576 } 00577 } 00578 jx += incx; 00579 } 00580 } 00581 } else { 00582 jy = ky; 00583 if (incx == ione) { 00584 for(j = izero; j < n; j++) { 00585 temp = y_zero; 00586 for(i = izero; i < m; i++) { 00587 temp += A[j*lda + i]*x[i]; 00588 } 00589 y[jy] += alpha*temp; 00590 jy += incy; 00591 } 00592 } else { 00593 for(j = izero; j < n; j++) { 00594 temp = y_zero; 00595 ix = kx; 00596 for (i = izero; i < m; i++) { 00597 temp += A[j*lda + i]*x[ix]; 00598 ix += incx; 00599 } 00600 y[jy] += alpha*temp; 00601 jy += incy; 00602 } 00603 } 00604 } 00605 } /* if (!BadArgument) */ 00606 } /* end GEMV */ 00607 00608 template<typename OrdinalType, typename ScalarType> 00609 template <typename A_type> 00610 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const 00611 { 00612 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00613 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00614 ScalarType zero = ScalarTraits<ScalarType>::zero(); 00615 bool BadArgument = false; 00616 00617 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error, 00618 "Teuchos::BLAS::TRMV() does not currently support CONJ_TRANS for complex data types."); 00619 00620 // Quick return if there is nothing to do! 00621 if( n == izero ){ return; } 00622 00623 // Otherwise, we need to check the argument list. 00624 if( n < izero ) { 00625 std::cout << "BLAS::TRMV Error: N == " << n << std::endl; 00626 BadArgument = true; 00627 } 00628 if( lda < n ) { 00629 std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl; 00630 BadArgument = true; 00631 } 00632 if( incx == izero ) { 00633 std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl; 00634 BadArgument = true; 00635 } 00636 00637 if(!BadArgument) { 00638 OrdinalType i, j, ix, jx, kx = izero; 00639 ScalarType temp; 00640 bool NoUnit = (EDiagChar[diag] == 'N'); 00641 00642 // Set the starting pointer for the std::vector x if incx < 0. 00643 if (incx < izero) { kx = (-n+ione)*incx; } 00644 00645 // Start the operations for a nontransposed triangular matrix 00646 if (ETranspChar[trans] == 'N') { 00647 /* Compute x = A*x */ 00648 if (EUploChar[uplo] == 'U') { 00649 /* A is an upper triangular matrix */ 00650 if (incx == ione) { 00651 for (j=izero; j<n; j++) { 00652 if (x[j] != zero) { 00653 temp = x[j]; 00654 for (i=izero; i<j; i++) { 00655 x[i] += temp*A[j*lda + i]; 00656 } 00657 if (NoUnit) 00658 x[j] *= A[j*lda + j]; 00659 } 00660 } 00661 } else { 00662 jx = kx; 00663 for (j=izero; j<n; j++) { 00664 if (x[jx] != zero) { 00665 temp = x[jx]; 00666 ix = kx; 00667 for (i=izero; i<j; i++) { 00668 x[ix] += temp*A[j*lda + i]; 00669 ix += incx; 00670 } 00671 if (NoUnit) 00672 x[jx] *= A[j*lda + j]; 00673 } 00674 jx += incx; 00675 } 00676 } /* if (incx == ione) */ 00677 } else { /* A is a lower triangular matrix */ 00678 if (incx == ione) { 00679 for (j=n-ione; j>-ione; j--) { 00680 if (x[j] != zero) { 00681 temp = x[j]; 00682 for (i=n-ione; i>j; i--) { 00683 x[i] += temp*A[j*lda + i]; 00684 } 00685 if (NoUnit) 00686 x[j] *= A[j*lda + j]; 00687 } 00688 } 00689 } else { 00690 kx += (n-ione)*incx; 00691 jx = kx; 00692 for (j=n-ione; j>-ione; j--) { 00693 if (x[jx] != zero) { 00694 temp = x[jx]; 00695 ix = kx; 00696 for (i=n-ione; i>j; i--) { 00697 x[ix] += temp*A[j*lda + i]; 00698 ix -= incx; 00699 } 00700 if (NoUnit) 00701 x[jx] *= A[j*lda + j]; 00702 } 00703 jx -= incx; 00704 } 00705 } 00706 } /* if (EUploChar[uplo]=='U') */ 00707 } else { /* A is transposed/conjugated */ 00708 /* Compute x = A'*x */ 00709 if (EUploChar[uplo]=='U') { 00710 /* A is an upper triangular matrix */ 00711 if (incx == ione) { 00712 for (j=n-ione; j>-ione; j--) { 00713 temp = x[j]; 00714 if (NoUnit) 00715 temp *= A[j*lda + j]; 00716 for (i=j-ione; i>-ione; i--) { 00717 temp += A[j*lda + i]*x[i]; 00718 } 00719 x[j] = temp; 00720 } 00721 } else { 00722 jx = kx + (n-ione)*incx; 00723 for (j=n-ione; j>-ione; j--) { 00724 temp = x[jx]; 00725 ix = jx; 00726 if (NoUnit) 00727 temp *= A[j*lda + j]; 00728 for (i=j-ione; i>-ione; i--) { 00729 ix -= incx; 00730 temp += A[j*lda + i]*x[ix]; 00731 } 00732 x[jx] = temp; 00733 jx -= incx; 00734 } 00735 } 00736 } else { 00737 /* A is a lower triangular matrix */ 00738 if (incx == ione) { 00739 for (j=izero; j<n; j++) { 00740 temp = x[j]; 00741 if (NoUnit) 00742 temp *= A[j*lda + j]; 00743 for (i=j+ione; i<n; i++) { 00744 temp += A[j*lda + i]*x[i]; 00745 } 00746 x[j] = temp; 00747 } 00748 } else { 00749 jx = kx; 00750 for (j=izero; j<n; j++) { 00751 temp = x[jx]; 00752 ix = jx; 00753 if (NoUnit) 00754 temp *= A[j*lda + j]; 00755 for (i=j+ione; i<n; i++) { 00756 ix += incx; 00757 temp += A[j*lda + i]*x[ix]; 00758 } 00759 x[jx] = temp; 00760 jx += incx; 00761 } 00762 } 00763 } /* if (EUploChar[uplo]=='U') */ 00764 } /* if (ETranspChar[trans]=='N') */ 00765 } /* if (!BadArgument) */ 00766 } /* end TRMV */ 00767 00768 template<typename OrdinalType, typename ScalarType> 00769 template <typename alpha_type, typename x_type, typename y_type> 00770 void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const 00771 { 00772 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00773 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00774 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 00775 y_type y_zero = ScalarTraits<y_type>::zero(); 00776 bool BadArgument = false; 00777 00778 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error, 00779 "Teuchos::BLAS::GER() does not currently support complex data types."); 00780 00781 // Quick return if there is nothing to do! 00782 if( m == izero || n == izero || alpha == alpha_zero ){ return; } 00783 00784 // Otherwise, we need to check the argument list. 00785 if( m < izero ) { 00786 std::cout << "BLAS::GER Error: M == " << m << std::endl; 00787 BadArgument = true; 00788 } 00789 if( n < izero ) { 00790 std::cout << "BLAS::GER Error: N == " << n << std::endl; 00791 BadArgument = true; 00792 } 00793 if( lda < m ) { 00794 std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl; 00795 BadArgument = true; 00796 } 00797 if( incx == 0 ) { 00798 std::cout << "BLAS::GER Error: INCX == 0"<< std::endl; 00799 BadArgument = true; 00800 } 00801 if( incy == 0 ) { 00802 std::cout << "BLAS::GER Error: INCY == 0"<< std::endl; 00803 BadArgument = true; 00804 } 00805 00806 if(!BadArgument) { 00807 OrdinalType i, j, ix, jy = izero, kx = izero; 00808 ScalarType temp; 00809 00810 // Set the starting pointers for the vectors x and y if incx/y < 0. 00811 if (incx < izero) { kx = (-m+ione)*incx; } 00812 if (incy < izero) { jy = (-n+ione)*incy; } 00813 00814 // Start the operations for incx == 1 00815 if( incx == ione ) { 00816 for( j=izero; j<n; j++ ) { 00817 if ( y[jy] != y_zero ) { 00818 temp = alpha*y[jy]; 00819 for ( i=izero; i<m; i++ ) { 00820 A[j*lda + i] += x[i]*temp; 00821 } 00822 } 00823 jy += incy; 00824 } 00825 } 00826 else { // Start the operations for incx != 1 00827 for( j=izero; j<n; j++ ) { 00828 if ( y[jy] != y_zero ) { 00829 temp = alpha*y[jy]; 00830 ix = kx; 00831 for( i=izero; i<m; i++ ) { 00832 A[j*lda + i] += x[ix]*temp; 00833 ix += incx; 00834 } 00835 } 00836 jy += incy; 00837 } 00838 } 00839 } /* if(!BadArgument) */ 00840 } /* end GER */ 00841 00842 //------------------------------------------------------------------------------------------ 00843 // LEVEL 3 BLAS ROUTINES 00844 //------------------------------------------------------------------------------------------ 00845 00846 template<typename OrdinalType, typename ScalarType> 00847 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 00848 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const 00849 { 00850 00851 typedef TypeNameTraits<OrdinalType> OTNT; 00852 typedef TypeNameTraits<ScalarType> STNT; 00853 00854 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00855 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 00856 beta_type beta_zero = ScalarTraits<beta_type>::zero(); 00857 B_type B_zero = ScalarTraits<B_type>::zero(); 00858 ScalarType C_zero = ScalarTraits<ScalarType>::zero(); 00859 beta_type beta_one = ScalarTraits<beta_type>::one(); 00860 OrdinalType i, j, p; 00861 OrdinalType NRowA = m, NRowB = k; 00862 ScalarType temp; 00863 bool BadArgument = false; 00864 00865 TEST_FOR_EXCEPTION( 00866 Teuchos::ScalarTraits<ScalarType>::isComplex 00867 && (transa == CONJ_TRANS || transb == CONJ_TRANS), 00868 std::logic_error, 00869 "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::GEMM()" 00870 " does not currently support CONJ_TRANS for complex data types." 00871 ); 00872 00873 // Change dimensions of matrix if either matrix is transposed 00874 if( !(ETranspChar[transa]=='N') ) { 00875 NRowA = k; 00876 } 00877 if( !(ETranspChar[transb]=='N') ) { 00878 NRowB = n; 00879 } 00880 00881 // Quick return if there is nothing to do! 00882 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; } 00883 if( m < izero ) { 00884 std::cout << "BLAS::GEMM Error: M == " << m << std::endl; 00885 BadArgument = true; 00886 } 00887 if( n < izero ) { 00888 std::cout << "BLAS::GEMM Error: N == " << n << std::endl; 00889 BadArgument = true; 00890 } 00891 if( k < izero ) { 00892 std::cout << "BLAS::GEMM Error: K == " << k << std::endl; 00893 BadArgument = true; 00894 } 00895 if( lda < NRowA ) { 00896 std::cout << "BLAS::GEMM Error: LDA < MAX(1,M)"<< std::endl; 00897 BadArgument = true; 00898 } 00899 if( ldb < NRowB ) { 00900 std::cout << "BLAS::GEMM Error: LDB < MAX(1,K)"<< std::endl; 00901 BadArgument = true; 00902 } 00903 if( ldc < m ) { 00904 std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl; 00905 BadArgument = true; 00906 } 00907 00908 if(!BadArgument) { 00909 00910 // Only need to scale the resulting matrix C. 00911 if( alpha == alpha_zero ) { 00912 if( beta == beta_zero ) { 00913 for (j=izero; j<n; j++) { 00914 for (i=izero; i<m; i++) { 00915 C[j*ldc + i] = C_zero; 00916 } 00917 } 00918 } else { 00919 for (j=izero; j<n; j++) { 00920 for (i=izero; i<m; i++) { 00921 C[j*ldc + i] *= beta; 00922 } 00923 } 00924 } 00925 return; 00926 } 00927 // 00928 // Now start the operations. 00929 // 00930 if ( ETranspChar[transb]=='N' ) { 00931 if ( ETranspChar[transa]=='N' ) { 00932 // Compute C = alpha*A*B + beta*C 00933 for (j=izero; j<n; j++) { 00934 if( beta == beta_zero ) { 00935 for (i=izero; i<m; i++){ 00936 C[j*ldc + i] = C_zero; 00937 } 00938 } else if( beta != beta_one ) { 00939 for (i=izero; i<m; i++){ 00940 C[j*ldc + i] *= beta; 00941 } 00942 } 00943 for (p=izero; p<k; p++){ 00944 if (B[j*ldb + p] != B_zero ){ 00945 temp = alpha*B[j*ldb + p]; 00946 for (i=izero; i<m; i++) { 00947 C[j*ldc + i] += temp*A[p*lda + i]; 00948 } 00949 } 00950 } 00951 } 00952 } else { 00953 // Compute C = alpha*A'*B + beta*C 00954 for (j=izero; j<n; j++) { 00955 for (i=izero; i<m; i++) { 00956 temp = C_zero; 00957 for (p=izero; p<k; p++) { 00958 temp += A[i*lda + p]*B[j*ldb + p]; 00959 } 00960 if (beta == beta_zero) { 00961 C[j*ldc + i] = alpha*temp; 00962 } else { 00963 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 00964 } 00965 } 00966 } 00967 } 00968 } else { 00969 if ( ETranspChar[transa]=='N' ) { 00970 // Compute C = alpha*A*B' + beta*C 00971 for (j=izero; j<n; j++) { 00972 if (beta == beta_zero) { 00973 for (i=izero; i<m; i++) { 00974 C[j*ldc + i] = C_zero; 00975 } 00976 } else if ( beta != beta_one ) { 00977 for (i=izero; i<m; i++) { 00978 C[j*ldc + i] *= beta; 00979 } 00980 } 00981 for (p=izero; p<k; p++) { 00982 if (B[p*ldb + j] != B_zero) { 00983 temp = alpha*B[p*ldb + j]; 00984 for (i=izero; i<m; i++) { 00985 C[j*ldc + i] += temp*A[p*lda + i]; 00986 } 00987 } 00988 } 00989 } 00990 } else { 00991 // Compute C += alpha*A'*B' + beta*C 00992 for (j=izero; j<n; j++) { 00993 for (i=izero; i<m; i++) { 00994 temp = C_zero; 00995 for (p=izero; p<k; p++) { 00996 temp += A[i*lda + p]*B[p*ldb + j]; 00997 } 00998 if (beta == beta_zero) { 00999 C[j*ldc + i] = alpha*temp; 01000 } else { 01001 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01002 } 01003 } 01004 } 01005 } // end if (ETranspChar[transa]=='N') ... 01006 } // end if (ETranspChar[transb]=='N') ... 01007 } // end if (!BadArgument) ... 01008 } // end of GEMM 01009 01010 01011 template<typename OrdinalType, typename ScalarType> 01012 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 01013 void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const 01014 { 01015 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01016 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 01017 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01018 beta_type beta_zero = ScalarTraits<beta_type>::zero(); 01019 ScalarType C_zero = ScalarTraits<ScalarType>::zero(); 01020 beta_type beta_one = ScalarTraits<beta_type>::one(); 01021 OrdinalType i, j, k, NRowA = m; 01022 ScalarType temp1, temp2; 01023 bool BadArgument = false; 01024 bool Upper = (EUploChar[uplo] == 'U'); 01025 if (ESideChar[side] == 'R') { NRowA = n; } 01026 01027 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error, 01028 "Teuchos::BLAS::SYMM() does not currently support complex data types."); 01029 01030 // Quick return. 01031 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; } 01032 if( m < 0 ) { 01033 std::cout << "BLAS::SYMM Error: M == "<< m << std::endl; 01034 BadArgument = true; } 01035 if( n < 0 ) { 01036 std::cout << "BLAS::SYMM Error: N == "<< n << std::endl; 01037 BadArgument = true; } 01038 if( lda < NRowA ) { 01039 std::cout << "BLAS::SYMM Error: LDA == "<<lda<<std::endl; 01040 BadArgument = true; } 01041 if( ldb < m ) { 01042 std::cout << "BLAS::SYMM Error: LDB == "<<ldb<<std::endl; 01043 BadArgument = true; } 01044 if( ldc < m ) { 01045 std::cout << "BLAS::SYMM Error: LDC == "<<ldc<<std::endl; 01046 BadArgument = true; } 01047 01048 if(!BadArgument) { 01049 01050 // Only need to scale C and return. 01051 if (alpha == alpha_zero) { 01052 if (beta == beta_zero ) { 01053 for (j=izero; j<n; j++) { 01054 for (i=izero; i<m; i++) { 01055 C[j*ldc + i] = C_zero; 01056 } 01057 } 01058 } else { 01059 for (j=izero; j<n; j++) { 01060 for (i=izero; i<m; i++) { 01061 C[j*ldc + i] *= beta; 01062 } 01063 } 01064 } 01065 return; 01066 } 01067 01068 if ( ESideChar[side] == 'L') { 01069 // Compute C = alpha*A*B + beta*C 01070 01071 if (Upper) { 01072 // The symmetric part of A is stored in the upper triangular part of the matrix. 01073 for (j=izero; j<n; j++) { 01074 for (i=izero; i<m; i++) { 01075 temp1 = alpha*B[j*ldb + i]; 01076 temp2 = C_zero; 01077 for (k=izero; k<i; k++) { 01078 C[j*ldc + k] += temp1*A[i*lda + k]; 01079 temp2 += B[j*ldb + k]*A[i*lda + k]; 01080 } 01081 if (beta == beta_zero) { 01082 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2; 01083 } else { 01084 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2; 01085 } 01086 } 01087 } 01088 } else { 01089 // The symmetric part of A is stored in the lower triangular part of the matrix. 01090 for (j=izero; j<n; j++) { 01091 for (i=m-ione; i>-ione; i--) { 01092 temp1 = alpha*B[j*ldb + i]; 01093 temp2 = C_zero; 01094 for (k=i+ione; k<m; k++) { 01095 C[j*ldc + k] += temp1*A[i*lda + k]; 01096 temp2 += B[j*ldb + k]*A[i*lda + k]; 01097 } 01098 if (beta == beta_zero) { 01099 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2; 01100 } else { 01101 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2; 01102 } 01103 } 01104 } 01105 } 01106 } else { 01107 // Compute C = alpha*B*A + beta*C. 01108 for (j=izero; j<n; j++) { 01109 temp1 = alpha*A[j*lda + j]; 01110 if (beta == beta_zero) { 01111 for (i=izero; i<m; i++) { 01112 C[j*ldc + i] = temp1*B[j*ldb + i]; 01113 } 01114 } else { 01115 for (i=izero; i<m; i++) { 01116 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i]; 01117 } 01118 } 01119 for (k=izero; k<j; k++) { 01120 if (Upper) { 01121 temp1 = alpha*A[j*lda + k]; 01122 } else { 01123 temp1 = alpha*A[k*lda + j]; 01124 } 01125 for (i=izero; i<m; i++) { 01126 C[j*ldc + i] += temp1*B[k*ldb + i]; 01127 } 01128 } 01129 for (k=j+ione; k<n; k++) { 01130 if (Upper) { 01131 temp1 = alpha*A[k*lda + j]; 01132 } else { 01133 temp1 = alpha*A[j*lda + k]; 01134 } 01135 for (i=izero; i<m; i++) { 01136 C[j*ldc + i] += temp1*B[k*ldb + i]; 01137 } 01138 } 01139 } 01140 } // end if (ESideChar[side]=='L') ... 01141 } // end if(!BadArgument) ... 01142 } // end SYMM 01143 01144 template<typename OrdinalType, typename ScalarType> 01145 template <typename alpha_type, typename A_type> 01146 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const 01147 { 01148 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01149 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 01150 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01151 A_type A_zero = ScalarTraits<A_type>::zero(); 01152 ScalarType B_zero = ScalarTraits<ScalarType>::zero(); 01153 ScalarType one = ScalarTraits<ScalarType>::one(); 01154 OrdinalType i, j, k, NRowA = m; 01155 ScalarType temp; 01156 bool BadArgument = false; 01157 bool LSide = (ESideChar[side] == 'L'); 01158 bool NoUnit = (EDiagChar[diag] == 'N'); 01159 bool Upper = (EUploChar[uplo] == 'U'); 01160 01161 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && transa == CONJ_TRANS, std::logic_error, 01162 "Teuchos::BLAS::TRMM() does not currently support CONJ_TRANS for complex data types."); 01163 01164 if(!LSide) { NRowA = n; } 01165 01166 // Quick return. 01167 if (n==izero || m==izero) { return; } 01168 if( m < 0 ) { 01169 std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl; 01170 BadArgument = true; } 01171 if( n < 0 ) { 01172 std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl; 01173 BadArgument = true; } 01174 if( lda < NRowA ) { 01175 std::cout << "BLAS::TRMM Error: LDA == "<< lda << std::endl; 01176 BadArgument = true; } 01177 if( ldb < m ) { 01178 std::cout << "BLAS::TRMM Error: M == "<< ldb << std::endl; 01179 BadArgument = true; } 01180 01181 if(!BadArgument) { 01182 01183 // B only needs to be zeroed out. 01184 if( alpha == alpha_zero ) { 01185 for( j=izero; j<n; j++ ) { 01186 for( i=izero; i<m; i++ ) { 01187 B[j*ldb + i] = B_zero; 01188 } 01189 } 01190 return; 01191 } 01192 01193 // Start the computations. 01194 if ( LSide ) { 01195 // A is on the left side of B. 01196 01197 if ( ETranspChar[transa]=='N' ) { 01198 // Compute B = alpha*A*B 01199 01200 if ( Upper ) { 01201 // A is upper triangular 01202 for( j=izero; j<n; j++ ) { 01203 for( k=izero; k<m; k++) { 01204 if ( B[j*ldb + k] != B_zero ) { 01205 temp = alpha*B[j*ldb + k]; 01206 for( i=izero; i<k; i++ ) { 01207 B[j*ldb + i] += temp*A[k*lda + i]; 01208 } 01209 if ( NoUnit ) 01210 temp *=A[k*lda + k]; 01211 B[j*ldb + k] = temp; 01212 } 01213 } 01214 } 01215 } else { 01216 // A is lower triangular 01217 for( j=izero; j<n; j++ ) { 01218 for( k=m-ione; k>-ione; k-- ) { 01219 if( B[j*ldb + k] != B_zero ) { 01220 temp = alpha*B[j*ldb + k]; 01221 B[j*ldb + k] = temp; 01222 if ( NoUnit ) 01223 B[j*ldb + k] *= A[k*lda + k]; 01224 for( i=k+ione; i<m; i++ ) { 01225 B[j*ldb + i] += temp*A[k*lda + i]; 01226 } 01227 } 01228 } 01229 } 01230 } 01231 } else { 01232 // Compute B = alpha*A'*B 01233 if( Upper ) { 01234 for( j=izero; j<n; j++ ) { 01235 for( i=m-ione; i>-ione; i-- ) { 01236 temp = B[j*ldb + i]; 01237 if( NoUnit ) 01238 temp *= A[i*lda + i]; 01239 for( k=izero; k<i; k++ ) { 01240 temp += A[i*lda + k]*B[j*ldb + k]; 01241 } 01242 B[j*ldb + i] = alpha*temp; 01243 } 01244 } 01245 } else { 01246 for( j=izero; j<n; j++ ) { 01247 for( i=izero; i<m; i++ ) { 01248 temp = B[j*ldb + i]; 01249 if( NoUnit ) 01250 temp *= A[i*lda + i]; 01251 for( k=i+ione; k<m; k++ ) { 01252 temp += A[i*lda + k]*B[j*ldb + k]; 01253 } 01254 B[j*ldb + i] = alpha*temp; 01255 } 01256 } 01257 } 01258 } 01259 } else { 01260 // A is on the right hand side of B. 01261 01262 if( ETranspChar[transa] == 'N' ) { 01263 // Compute B = alpha*B*A 01264 01265 if( Upper ) { 01266 // A is upper triangular. 01267 for( j=n-ione; j>-ione; j-- ) { 01268 temp = alpha; 01269 if( NoUnit ) 01270 temp *= A[j*lda + j]; 01271 for( i=izero; i<m; i++ ) { 01272 B[j*ldb + i] *= temp; 01273 } 01274 for( k=izero; k<j; k++ ) { 01275 if( A[j*lda + k] != A_zero ) { 01276 temp = alpha*A[j*lda + k]; 01277 for( i=izero; i<m; i++ ) { 01278 B[j*ldb + i] += temp*B[k*ldb + i]; 01279 } 01280 } 01281 } 01282 } 01283 } else { 01284 // A is lower triangular. 01285 for( j=izero; j<n; j++ ) { 01286 temp = alpha; 01287 if( NoUnit ) 01288 temp *= A[j*lda + j]; 01289 for( i=izero; i<m; i++ ) { 01290 B[j*ldb + i] *= temp; 01291 } 01292 for( k=j+ione; k<n; k++ ) { 01293 if( A[j*lda + k] != A_zero ) { 01294 temp = alpha*A[j*lda + k]; 01295 for( i=izero; i<m; i++ ) { 01296 B[j*ldb + i] += temp*B[k*ldb + i]; 01297 } 01298 } 01299 } 01300 } 01301 } 01302 } else { 01303 // Compute B = alpha*B*A' 01304 01305 if( Upper ) { 01306 for( k=izero; k<n; k++ ) { 01307 for( j=izero; j<k; j++ ) { 01308 if( A[k*lda + j] != A_zero ) { 01309 temp = alpha*A[k*lda + j]; 01310 for( i=izero; i<m; i++ ) { 01311 B[j*ldb + i] += temp*B[k*ldb + i]; 01312 } 01313 } 01314 } 01315 temp = alpha; 01316 if( NoUnit ) 01317 temp *= A[k*lda + k]; 01318 if( temp != one ) { 01319 for( i=izero; i<m; i++ ) { 01320 B[k*ldb + i] *= temp; 01321 } 01322 } 01323 } 01324 } else { 01325 for( k=n-ione; k>-ione; k-- ) { 01326 for( j=k+ione; j<n; j++ ) { 01327 if( A[k*lda + j] != A_zero ) { 01328 temp = alpha*A[k*lda + j]; 01329 for( i=izero; i<m; i++ ) { 01330 B[j*ldb + i] += temp*B[k*ldb + i]; 01331 } 01332 } 01333 } 01334 temp = alpha; 01335 if( NoUnit ) 01336 temp *= A[k*lda + k]; 01337 if( temp != one ) { 01338 for( i=izero; i<m; i++ ) { 01339 B[k*ldb + i] *= temp; 01340 } 01341 } 01342 } 01343 } 01344 } // end if( ETranspChar[transa] == 'N' ) ... 01345 } // end if ( LSide ) ... 01346 } // end if (!BadArgument) 01347 } // end TRMM 01348 01349 template<typename OrdinalType, typename ScalarType> 01350 template <typename alpha_type, typename A_type> 01351 void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const 01352 { 01353 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01354 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 01355 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01356 A_type A_zero = ScalarTraits<A_type>::zero(); 01357 ScalarType B_zero = ScalarTraits<ScalarType>::zero(); 01358 alpha_type alpha_one = ScalarTraits<alpha_type>::one(); 01359 ScalarType B_one = ScalarTraits<ScalarType>::one(); 01360 ScalarType temp; 01361 OrdinalType NRowA = m; 01362 bool BadArgument = false; 01363 bool NoUnit = (EDiagChar[diag]=='N'); 01364 01365 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && transa == CONJ_TRANS, std::logic_error, 01366 "Teuchos::BLAS::TRSM() does not currently support CONJ_TRANS for complex data types."); 01367 01368 if (!(ESideChar[side] == 'L')) { NRowA = n; } 01369 01370 // Quick return. 01371 if (n == izero || m == izero) { return; } 01372 if( m < izero ) { 01373 std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl; 01374 BadArgument = true; } 01375 if( n < izero ) { 01376 std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl; 01377 BadArgument = true; } 01378 if( lda < NRowA ) { 01379 std::cout << "BLAS::TRSM Error: LDA == "<<lda<<std::endl; 01380 BadArgument = true; } 01381 if( ldb < m ) { 01382 std::cout << "BLAS::TRSM Error: LDB == "<<ldb<<std::endl; 01383 BadArgument = true; } 01384 01385 if(!BadArgument) 01386 { 01387 int i, j, k; 01388 // Set the solution to the zero std::vector. 01389 if(alpha == alpha_zero) { 01390 for(j = izero; j < n; j++) { 01391 for( i = izero; i < m; i++) { 01392 B[j*ldb+i] = B_zero; 01393 } 01394 } 01395 } 01396 else 01397 { // Start the operations. 01398 if(ESideChar[side] == 'L') { 01399 // 01400 // Perform computations for OP(A)*X = alpha*B 01401 // 01402 if(ETranspChar[transa] == 'N') { 01403 // 01404 // Compute B = alpha*inv( A )*B 01405 // 01406 if(EUploChar[uplo] == 'U') { 01407 // A is upper triangular. 01408 for(j = izero; j < n; j++) { 01409 // Perform alpha*B if alpha is not 1. 01410 if(alpha != alpha_one) { 01411 for( i = izero; i < m; i++) { 01412 B[j*ldb+i] *= alpha; 01413 } 01414 } 01415 // Perform a backsolve for column j of B. 01416 for(k = (m - ione); k > -ione; k--) { 01417 // If this entry is zero, we don't have to do anything. 01418 if (B[j*ldb + k] != B_zero) { 01419 if (NoUnit) { 01420 B[j*ldb + k] /= A[k*lda + k]; 01421 } 01422 for(i = izero; i < k; i++) { 01423 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i]; 01424 } 01425 } 01426 } 01427 } 01428 } 01429 else 01430 { // A is lower triangular. 01431 for(j = izero; j < n; j++) { 01432 // Perform alpha*B if alpha is not 1. 01433 if(alpha != alpha_one) { 01434 for( i = izero; i < m; i++) { 01435 B[j*ldb+i] *= alpha; 01436 } 01437 } 01438 // Perform a forward solve for column j of B. 01439 for(k = izero; k < m; k++) { 01440 // If this entry is zero, we don't have to do anything. 01441 if (B[j*ldb + k] != B_zero) { 01442 if (NoUnit) { 01443 B[j*ldb + k] /= A[k*lda + k]; 01444 } 01445 for(i = k+ione; i < m; i++) { 01446 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i]; 01447 } 01448 } 01449 } 01450 } 01451 } // end if (uplo == 'U') 01452 } // if (transa =='N') 01453 else { 01454 // 01455 // Compute B = alpha*inv( A' )*B 01456 // 01457 if(EUploChar[uplo] == 'U') { 01458 // A is upper triangular. 01459 for(j = izero; j < n; j++) { 01460 for( i = izero; i < m; i++) { 01461 temp = alpha*B[j*ldb+i]; 01462 for(k = izero; k < i; k++) { 01463 temp -= A[i*lda + k] * B[j*ldb + k]; 01464 } 01465 if (NoUnit) { 01466 temp /= A[i*lda + i]; 01467 } 01468 B[j*ldb + i] = temp; 01469 } 01470 } 01471 } 01472 else 01473 { // A is lower triangular. 01474 for(j = izero; j < n; j++) { 01475 for(i = (m - ione) ; i > -ione; i--) { 01476 temp = alpha*B[j*ldb+i]; 01477 for(k = i+ione; k < m; k++) { 01478 temp -= A[i*lda + k] * B[j*ldb + k]; 01479 } 01480 if (NoUnit) { 01481 temp /= A[i*lda + i]; 01482 } 01483 B[j*ldb + i] = temp; 01484 } 01485 } 01486 } 01487 } 01488 } // if (side == 'L') 01489 else { 01490 // side == 'R' 01491 // 01492 // Perform computations for X*OP(A) = alpha*B 01493 // 01494 if (ETranspChar[transa] == 'N') { 01495 // 01496 // Compute B = alpha*B*inv( A ) 01497 // 01498 if(EUploChar[uplo] == 'U') { 01499 // A is upper triangular. 01500 // Perform a backsolve for column j of B. 01501 for(j = izero; j < n; j++) { 01502 // Perform alpha*B if alpha is not 1. 01503 if(alpha != alpha_one) { 01504 for( i = izero; i < m; i++) { 01505 B[j*ldb+i] *= alpha; 01506 } 01507 } 01508 for(k = izero; k < j; k++) { 01509 // If this entry is zero, we don't have to do anything. 01510 if (A[j*lda + k] != A_zero) { 01511 for(i = izero; i < m; i++) { 01512 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i]; 01513 } 01514 } 01515 } 01516 if (NoUnit) { 01517 temp = B_one/A[j*lda + j]; 01518 for(i = izero; i < m; i++) { 01519 B[j*ldb + i] *= temp; 01520 } 01521 } 01522 } 01523 } 01524 else 01525 { // A is lower triangular. 01526 for(j = (n - ione); j > -ione; j--) { 01527 // Perform alpha*B if alpha is not 1. 01528 if(alpha != alpha_one) { 01529 for( i = izero; i < m; i++) { 01530 B[j*ldb+i] *= alpha; 01531 } 01532 } 01533 // Perform a forward solve for column j of B. 01534 for(k = j+ione; k < n; k++) { 01535 // If this entry is zero, we don't have to do anything. 01536 if (A[j*lda + k] != A_zero) { 01537 for(i = izero; i < m; i++) { 01538 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i]; 01539 } 01540 } 01541 } 01542 if (NoUnit) { 01543 temp = B_one/A[j*lda + j]; 01544 for(i = izero; i < m; i++) { 01545 B[j*ldb + i] *= temp; 01546 } 01547 } 01548 } 01549 } // end if (uplo == 'U') 01550 } // if (transa =='N') 01551 else { 01552 // 01553 // Compute B = alpha*B*inv( A' ) 01554 // 01555 if(EUploChar[uplo] == 'U') { 01556 // A is upper triangular. 01557 for(k = (n - ione); k > -ione; k--) { 01558 if (NoUnit) { 01559 temp = B_one/A[k*lda + k]; 01560 for(i = izero; i < m; i++) { 01561 B[k*ldb + i] *= temp; 01562 } 01563 } 01564 for(j = izero; j < k; j++) { 01565 if (A[k*lda + j] != A_zero) { 01566 temp = A[k*lda + j]; 01567 for(i = izero; i < m; i++) { 01568 B[j*ldb + i] -= temp*B[k*ldb + i]; 01569 } 01570 } 01571 } 01572 if (alpha != alpha_one) { 01573 for (i = izero; i < m; i++) { 01574 B[k*ldb + i] *= alpha; 01575 } 01576 } 01577 } 01578 } 01579 else 01580 { // A is lower triangular. 01581 for(k = izero; k < n; k++) { 01582 if (NoUnit) { 01583 temp = B_one/A[k*lda + k]; 01584 for (i = izero; i < m; i++) { 01585 B[k*ldb + i] *= temp; 01586 } 01587 } 01588 for(j = k+ione; j < n; j++) { 01589 if(A[k*lda + j] != A_zero) { 01590 temp = A[k*lda + j]; 01591 for(i = izero; i < m; i++) { 01592 B[j*ldb + i] -= temp*B[k*ldb + i]; 01593 } 01594 } 01595 } 01596 if (alpha != alpha_one) { 01597 for (i = izero; i < m; i++) { 01598 B[k*ldb + i] *= alpha; 01599 } 01600 } 01601 } 01602 } 01603 } 01604 } 01605 } 01606 } 01607 } 01608 01609 // Explicit instantiation for template<int,float> 01610 01611 template <> 01612 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, float> 01613 { 01614 public: 01615 inline BLAS(void) {} 01616 inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {} 01617 inline virtual ~BLAS(void) {} 01618 void ROTG(float* da, float* db, float* c, float* s) const; 01619 void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const; 01620 float ASUM(const int n, const float* x, const int incx) const; 01621 void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const; 01622 void COPY(const int n, const float* x, const int incx, float* y, const int incy) const; 01623 float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const; 01624 float NRM2(const int n, const float* x, const int incx) const; 01625 void SCAL(const int n, const float alpha, float* x, const int incx) const; 01626 int IAMAX(const int n, const float* x, const int incx) const; 01627 void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const; 01628 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const; 01629 void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const; 01630 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const; 01631 void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const; 01632 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const; 01633 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const; 01634 }; 01635 01636 // Explicit instantiation for template<int,double> 01637 01638 template<> 01639 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, double> 01640 { 01641 public: 01642 inline BLAS(void) {} 01643 inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {} 01644 inline virtual ~BLAS(void) {} 01645 void ROTG(double* da, double* db, double* c, double* s) const; 01646 void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const; 01647 double ASUM(const int n, const double* x, const int incx) const; 01648 void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const; 01649 void COPY(const int n, const double* x, const int incx, double* y, const int incy) const; 01650 double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const; 01651 double NRM2(const int n, const double* x, const int incx) const; 01652 void SCAL(const int n, const double alpha, double* x, const int incx) const; 01653 int IAMAX(const int n, const double* x, const int incx) const; 01654 void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const; 01655 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const; 01656 void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const; 01657 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const; 01658 void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const; 01659 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const; 01660 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const; 01661 }; 01662 01663 // Explicit instantiation for template<int,complex<float> > 01664 01665 template<> 01666 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<float> > 01667 { 01668 public: 01669 inline BLAS(void) {} 01670 inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {} 01671 inline virtual ~BLAS(void) {} 01672 void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const; 01673 void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const; 01674 float ASUM(const int n, const std::complex<float>* x, const int incx) const; 01675 void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const; 01676 void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const; 01677 std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const; 01678 float NRM2(const int n, const std::complex<float>* x, const int incx) const; 01679 void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const; 01680 int IAMAX(const int n, const std::complex<float>* x, const int incx) const; 01681 void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const; 01682 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const; 01683 void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const; 01684 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const; 01685 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const; 01686 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const; 01687 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const; 01688 }; 01689 01690 // Explicit instantiation for template<int,complex<double> > 01691 01692 template<> 01693 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<double> > 01694 { 01695 public: 01696 inline BLAS(void) {} 01697 inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {} 01698 inline virtual ~BLAS(void) {} 01699 void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const; 01700 void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const; 01701 double ASUM(const int n, const std::complex<double>* x, const int incx) const; 01702 void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const; 01703 void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const; 01704 std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const; 01705 double NRM2(const int n, const std::complex<double>* x, const int incx) const; 01706 void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const; 01707 int IAMAX(const int n, const std::complex<double>* x, const int incx) const; 01708 void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const; 01709 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const; 01710 void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const; 01711 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const; 01712 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const; 01713 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const; 01714 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const; 01715 }; 01716 01717 } // namespace Teuchos 01718 01719 #endif // _TEUCHOS_BLAS_HPP_
1.7.4