41#ifndef TPETRA_MATRIXMATRIX_DEF_HPP
42#define TPETRA_MATRIXMATRIX_DEF_HPP
43#include "KokkosSparse_Utils.hpp"
44#include "Tpetra_ConfigDefs.hpp"
46#include "Teuchos_VerboseObject.hpp"
47#include "Teuchos_Array.hpp"
49#include "Tpetra_CrsMatrix.hpp"
50#include "Tpetra_BlockCrsMatrix.hpp"
52#include "Tpetra_RowMatrixTransposer.hpp"
55#include "Tpetra_Details_makeColMap.hpp"
56#include "Tpetra_ConfigDefs.hpp"
57#include "Tpetra_Map.hpp"
58#include "Tpetra_Export.hpp"
63#include "Teuchos_FancyOStream.hpp"
65#include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
68#include "KokkosSparse_spgemm.hpp"
69#include "KokkosSparse_spadd.hpp"
70#include "Kokkos_Bitset.hpp"
84#include "TpetraExt_MatrixMatrix_OpenMP.hpp"
85#include "TpetraExt_MatrixMatrix_Cuda.hpp"
86#include "TpetraExt_MatrixMatrix_HIP.hpp"
87#include "TpetraExt_MatrixMatrix_SYCL.hpp"
91namespace MatrixMatrix{
99template <
class Scalar,
110 const std::string& label,
111 const Teuchos::RCP<Teuchos::ParameterList>&
params)
126#ifdef HAVE_TPETRA_MMM_TIMINGS
127 std::string
prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
128 using Teuchos::TimeMonitor;
134 const std::string
prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
159 const bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
165#ifdef USE_OLD_TRANSPOSE
169 using Teuchos::ParameterList;
197 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
198 "must match for matrix-matrix product. op(A) is "
206 prefix <<
"ERROR, dimensions of result C must "
207 "match dimensions of op(A) * op(B). C has " <<
C.getGlobalNumRows()
208 <<
" rows, should have at least " <<
Aouter << std::endl);
216 if (!
C.isFillActive())
C.resumeFill();
230#ifdef HAVE_TPETRA_MMM_TIMINGS
252#ifdef HAVE_TPETRA_MMM_TIMINGS
260 MMdetails::mult_AT_B_newmatrix(
A,
B,
C, label,
params);
277#ifdef HAVE_TPETRA_MMM_TIMINGS
287 C.fillComplete(
Bprime->getDomainMap(),
Aprime->getRangeMap());
305 const std::string& label)
318 std::string
prefix = std::string(
"TpetraExt ") + label + std::string(
": ");
334 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
335 "must match for matrix-matrix product. op(A) is "
342 const LO blocksize =
A->getBlockSize();
344 prefix <<
"ERROR, Blocksizes do not match. A.blocksize = " <<
345 blocksize <<
", B.blocksize = " <<
B->getBlockSize() );
365 MMdetails::import_and_extract_views(*
B,
targetMap_B,
Bview,
A->getGraph()->getImporter(),
366 A->getGraph()->getImporter().is_null());
382 const std::string& label,
383 const Teuchos::RCP<Teuchos::ParameterList>&
params)
395#ifdef HAVE_TPETRA_MMM_TIMINGS
396 std::string
prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
397 using Teuchos::TimeMonitor;
401 const std::string
prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
420 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
421 "must match for matrix-matrix product. op(A) is "
429 prefix <<
"ERROR, dimensions of result C must "
430 "match dimensions of op(A) * op(B). C has "<<
C.getGlobalNumRows()
431 <<
" rows, should have at least "<<
Aouter << std::endl);
453#ifdef HAVE_TPETRA_MMM_TIMINGS
462 importParams1->set(
"compute global constants",
params->get(
"compute global constants: temporaries",
false));
464 auto slist =
params->sublist(
"matrixmatrix: kernel params",
false);
468 bool isMM =
slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
472 ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete",
isMM);
490 importParams2->set(
"compute global constants",
params->get(
"compute global constants: temporaries",
false));
492 auto slist =
params->sublist(
"matrixmatrix: kernel params",
false);
494 bool isMM =
slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
500 ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete",
isMM);
506#ifdef HAVE_TPETRA_MMM_TIMINGS
515 bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
530 typedef Teuchos::ScalarTraits<Scalar> STS;
531 typename STS::magnitudeType
threshold =
params->get(
"remove zeros threshold", STS::magnitude(STS::zero()));
549 using Teuchos::Array;
559 const std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
562 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
563 "(Result matrix B is not required to be isFillComplete()).");
565 prefix <<
"ERROR, input matrix B must not be fill complete!");
567 prefix <<
"ERROR, input matrix B must not have static graph!");
569 prefix <<
"ERROR, input matrix B must not be locally indexed!");
571 using Teuchos::ParameterList;
585 typename crs_matrix_type::nonconst_global_inds_host_view_type
a_inds(
"a_inds",
A.getLocalMaxNumRowEntries());
586 typename crs_matrix_type::nonconst_values_host_view_type
a_vals(
"a_vals",
A.getLocalMaxNumRowEntries());
589 if (
scalarB != Teuchos::ScalarTraits<SC>::one())
594 if (
scalarA != Teuchos::ScalarTraits<SC>::zero()) {
596 row =
B.getRowMap()->getGlobalElement(
i);
599 if (
scalarA != Teuchos::ScalarTraits<SC>::one())
615Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
624 const Teuchos::RCP<Teuchos::ParameterList>&
params)
627 using Teuchos::rcpFromRef;
629 using Teuchos::ParameterList;
634 params->isParameter(
"Call fillComplete") && !
params->get<
bool>(
"Call fillComplete"),
635 std::invalid_argument,
636 "Tpetra::MatrixMatrix::add(): this version of add() always calls fillComplete\n"
637 "on the result, but you explicitly set 'Call fillComplete' = false in the parameter list. Don't set this explicitly.");
638 params->set(
"Call fillComplete",
true);
650 (!
A.isFillComplete () || !
Brcp->isFillComplete (), std::invalid_argument,
651 "TpetraExt::MatrixMatrix::add(): A and B must both be fill complete.");
662template<
class LO,
class GO,
class LOView,
class GOView,
class LocalMap>
663struct ConvertGlobalToLocalFunctor
669 KOKKOS_FUNCTION
void operator() (
const GO i)
const
671 lids(i) = localColMap.getLocalElement(gids(i));
676 const LocalMap localColMap;
679template <
class Scalar,
693 const Teuchos::RCP<Teuchos::ParameterList>&
params)
697 using Teuchos::rcpFromRef;
698 using Teuchos::rcp_implicit_cast;
699 using Teuchos::rcp_dynamic_cast;
700 using Teuchos::TimeMonitor;
711 using exec_space =
typename crs_graph_type::execution_space;
712 using AddKern = MMdetails::AddKernels<SC,LO,GO,NO>;
713 const char*
prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
714 constexpr bool debug =
false;
716#ifdef HAVE_TPETRA_MMM_TIMINGS
721 std::ostringstream
os;
722 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
723 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
724 std::cerr <<
os.str ();
728 (
C.isLocallyIndexed() ||
C.isGloballyIndexed(), std::invalid_argument,
729 prefix_mmm <<
"C must be a 'new' matrix (neither locally nor globally indexed).");
731 (!
A.isFillComplete () || !
B.isFillComplete (), std::invalid_argument,
732 prefix_mmm <<
"A and B must both be fill complete.");
733#ifdef HAVE_TPETRA_DEBUG
735 if (
A.isFillComplete () &&
B.isFillComplete ()) {
738 !
A.getDomainMap()->locallySameAs (*
B.getDomainMap ())) ||
740 !
A.getDomainMap()->isSameAs (*
B.getRangeMap ())) ||
742 !
A.getRangeMap ()->isSameAs (*
B.getDomainMap ()));
744 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
748 !
A.getRangeMap ()->isSameAs (*
B.getRangeMap ())) ||
750 !
A.getRangeMap ()->isSameAs (*
B.getDomainMap())) ||
752 !
A.getDomainMap()->isSameAs (*
B.getRangeMap ()));
754 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
758 using Teuchos::ParameterList;
766#ifdef HAVE_TPETRA_DEBUG
768 (
Aprime.is_null (), std::logic_error,
770 "Please report this bug to the Tpetra developers.");
777 std::ostringstream
os;
778 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
779 <<
"Form explicit xpose of B" << std::endl;
780 std::cerr <<
os.str ();
785#ifdef HAVE_TPETRA_DEBUG
787 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
789 !
Aprime->isFillComplete () || !
Bprime->isFillComplete (), std::invalid_argument,
790 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
791 "Please report this bug to the Tpetra developers.");
805 typedef typename AddKern::values_array values_array;
806 typedef typename AddKern::row_ptrs_array row_ptrs_array;
807 typedef typename AddKern::col_inds_array col_inds_array;
813#ifdef HAVE_TPETRA_MMM_TIMINGS
817 if(!(
Aprime->getRowMap()->isSameAs(*(
Bprime->getRowMap()))))
820 auto import = rcp(new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
832 if(Teuchos::nonnull(
params) &&
params->isParameter(
"Call fillComplete"))
846 rowptrs = row_ptrs_array(
"C rowptrs", 0);
852 using global_col_inds_array =
typename AddKern::global_col_inds_array;
853#ifdef HAVE_TPETRA_MMM_TIMINGS
862 std::ostringstream
os;
863 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
864 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
865 std::cerr <<
os.str ();
867 AddKern::convertToGlobalAndAdd(
871 std::ostringstream
os;
872 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
873 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
874 std::cerr <<
os.str ();
876#ifdef HAVE_TPETRA_MMM_TIMINGS
881 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
885 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0,
globalColinds.extent(0)),
887 col_inds_array, global_col_inds_array,
888 typename map_type::local_map_type>
890 KokkosSparse::sort_crs_matrix<exec_space, row_ptrs_array, col_inds_array, values_array>(
rowptrs,
localColinds,
vals);
901 auto Arowptrs =
Alocal.graph.row_map;
902 auto Browptrs =
Blocal.graph.row_map;
903 auto Acolinds =
Alocal.graph.entries;
904 auto Bcolinds =
Blocal.graph.entries;
908#ifdef HAVE_TPETRA_MMM_TIMINGS
913 std::ostringstream
os;
914 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
915 <<
"Call AddKern::addSorted(...)" << std::endl;
916 std::cerr <<
os.str ();
918 AddKern::addSorted(
Avals, Arowptrs, Acolinds,
alpha,
Bvals, Browptrs, Bcolinds,
beta,
vals,
rowptrs,
colinds);
923#ifdef HAVE_TPETRA_MMM_TIMINGS
928 std::ostringstream
os;
929 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
930 <<
"Call AddKern::addUnsorted(...)" << std::endl;
931 std::cerr <<
os.str ();
933 AddKern::addUnsorted(
Avals, Arowptrs, Acolinds,
alpha,
Bvals, Browptrs, Bcolinds,
beta,
Aprime->getGlobalNumCols(),
vals,
rowptrs,
colinds);
941 #ifdef HAVE_TPETRA_MMM_TIMINGS
948 std::ostringstream
os;
949 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
950 <<
"Create Cimport" << std::endl;
951 std::cerr <<
os.str ();
958 std::ostringstream
os;
959 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
960 <<
"Create Cexport" << std::endl;
961 std::cerr <<
os.str ();
967 std::ostringstream
os;
968 os <<
"Proc " <<
A.getMap ()->getComm ()->getRank () <<
": "
969 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
970 std::cerr <<
os.str ();
990 using Teuchos::Array;
991 using Teuchos::ArrayRCP;
992 using Teuchos::ArrayView;
995 using Teuchos::rcp_dynamic_cast;
996 using Teuchos::rcpFromRef;
997 using Teuchos::tuple;
1000 typedef Teuchos::ScalarTraits<Scalar> STS;
1008 std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
1011 prefix <<
"The case C == null does not actually work. Fixing this will require an interface change.");
1014 !
A.isFillComplete () || !
B.isFillComplete (), std::invalid_argument,
1015 prefix <<
"Both input matrices must be fill complete before calling this function.");
1017#ifdef HAVE_TPETRA_DEBUG
1024 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
1031 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
1035 using Teuchos::ParameterList;
1049#ifdef HAVE_TPETRA_DEBUG
1051 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
1064#ifdef HAVE_TPETRA_DEBUG
1066 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
1070 if (!
C.is_null ()) {
1071 C->setAllToScalar (STS::zero ());
1076 C =
rcp (
new crs_matrix_type (
Aprime->getRowMap (), 0));
1079#ifdef HAVE_TPETRA_DEBUG
1081 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1083 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1085 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1093 for (
int k = 0;
k < 2; ++
k) {
1094 typename crs_matrix_type::nonconst_global_inds_host_view_type
Indices;
1095 typename crs_matrix_type::nonconst_values_host_view_type
Values;
1103#ifdef HAVE_TPETRA_DEBUG
1105 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1108#ifdef HAVE_TPETRA_DEBUG
1110 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1116 size_t numEntries =
Mat[
k]->getNumEntriesInGlobalRow (
globalRow);
1117 if (numEntries > 0) {
1118 Kokkos::resize(
Indices,numEntries);
1119 Kokkos::resize(
Values,numEntries);
1122 if (
scalar[
k] != STS::one ()) {
1123 for (
size_t j = 0;
j < numEntries; ++
j) {
1128 if (
C->isFillComplete ()) {
1129 C->sumIntoGlobalValues (
globalRow, numEntries,
1132 C->insertGlobalValues (
globalRow, numEntries,
1194template<
class Scalar,
1196 class GlobalOrdinal,
1198void mult_AT_B_newmatrix(
1199 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1200 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1201 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1202 const std::string & label,
1203 const Teuchos::RCP<Teuchos::ParameterList>& params)
1208 typedef LocalOrdinal LO;
1209 typedef GlobalOrdinal GO;
1211 typedef CrsMatrixStruct<SC,LO,GO,NO> crs_matrix_struct_type;
1212 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
1214#ifdef HAVE_TPETRA_MMM_TIMINGS
1215 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1216 using Teuchos::TimeMonitor;
1217 RCP<TimeMonitor> MM = rcp (
new TimeMonitor
1218 (*TimeMonitor::getNewTimer (prefix_mmm +
"MMM-T Transpose")));
1224 transposer_type transposer (rcpFromRef (A), label + std::string(
"XP: "));
1226 using Teuchos::ParameterList;
1227 RCP<ParameterList> transposeParams (
new ParameterList);
1228 transposeParams->set (
"sort",
false);
1229 if(! params.is_null ()) {
1230 transposeParams->set (
"compute global constants",
1231 params->get (
"compute global constants: temporaries",
1234 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atrans =
1235 transposer.createTransposeLocal (transposeParams);
1240#ifdef HAVE_TPETRA_MMM_TIMINGS
1242 MM = rcp (
new TimeMonitor
1243 (*TimeMonitor::getNewTimer (prefix_mmm + std::string (
"MMM-T I&X"))));
1247 crs_matrix_struct_type Aview;
1248 crs_matrix_struct_type Bview;
1249 RCP<const Import<LO, GO, NO> > dummyImporter;
1252 RCP<Teuchos::ParameterList> importParams1 (
new ParameterList);
1253 if (! params.is_null ()) {
1254 importParams1->set (
"compute global constants",
1255 params->get (
"compute global constants: temporaries",
1257 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1258 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1259 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1260 int mm_optimization_core_count =
1262 mm_optimization_core_count =
1263 slist.get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1264 int mm_optimization_core_count2 =
1265 params->get (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1266 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1267 mm_optimization_core_count = mm_optimization_core_count2;
1269 auto & sip1 = importParams1->sublist (
"matrixmatrix: kernel params",
false);
1270 sip1.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1271 sip1.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1272 sip1.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1275 MMdetails::import_and_extract_views (*Atrans, Atrans->getRowMap (),
1276 Aview, dummyImporter,
true,
1277 label, importParams1);
1279 RCP<ParameterList> importParams2 (
new ParameterList);
1280 if (! params.is_null ()) {
1281 importParams2->set (
"compute global constants",
1282 params->get (
"compute global constants: temporaries",
1284 auto slist = params->sublist (
"matrixmatrix: kernel params",
false);
1285 bool isMM = slist.get (
"isMatrixMatrix_TransferAndFillComplete",
false);
1286 bool overrideAllreduce = slist.get (
"MM_TAFC_OverrideAllreduceCheck",
false);
1287 int mm_optimization_core_count =
1289 mm_optimization_core_count =
1290 slist.get (
"MM_TAFC_OptimizationCoreCount",
1291 mm_optimization_core_count);
1292 int mm_optimization_core_count2 =
1293 params->get (
"MM_TAFC_OptimizationCoreCount",
1294 mm_optimization_core_count);
1295 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1296 mm_optimization_core_count = mm_optimization_core_count2;
1298 auto & sip2 = importParams2->sublist (
"matrixmatrix: kernel params",
false);
1299 sip2.set (
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1300 sip2.set (
"isMatrixMatrix_TransferAndFillComplete", isMM);
1301 sip2.set (
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1304 if(B.getRowMap()->isSameAs(*Atrans->getColMap())){
1305 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1308 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label,importParams2);
1311#ifdef HAVE_TPETRA_MMM_TIMINGS
1313 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
1316 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ctemp;
1319 bool needs_final_export = ! Atrans->getGraph ()->getExporter ().is_null();
1320 if (needs_final_export) {
1324 Ctemp = rcp (&C,
false);
1327 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1332#ifdef HAVE_TPETRA_MMM_TIMINGS
1334 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1337 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Crcp (&C,
false);
1339 if (needs_final_export) {
1340 ParameterList labelList;
1341 labelList.set(
"Timer Label", label);
1342 if(!params.is_null()) {
1343 ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
1344 ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
1346 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1347 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
1348 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
1349 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
1350 bool isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1351 bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1353 labelList_subList.set (
"isMatrixMatrix_TransferAndFillComplete", isMM,
1354 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
1355 labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1356 labelList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
1358 Ctemp->exportAndFillComplete (Crcp,
1359 *Ctemp->getGraph ()->getExporter (),
1362 rcp (&labelList,
false));
1364#ifdef HAVE_TPETRA_MMM_STATISTICS
1365 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1371template<
class Scalar,
1373 class GlobalOrdinal,
1376 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1377 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1378 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1379 const std::string& ,
1380 const Teuchos::RCP<Teuchos::ParameterList>& )
1382 using Teuchos::Array;
1383 using Teuchos::ArrayRCP;
1384 using Teuchos::ArrayView;
1385 using Teuchos::OrdinalTraits;
1386 using Teuchos::null;
1388 typedef Teuchos::ScalarTraits<Scalar> STS;
1390 LocalOrdinal C_firstCol = Bview.
colMap->getMinLocalIndex();
1391 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1393 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1394 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1396 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getLocalElementList();
1397 ArrayView<const GlobalOrdinal> bcols_import = null;
1398 if (Bview.importColMap != null) {
1399 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1400 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1402 bcols_import = Bview.importColMap->getLocalElementList();
1405 size_t C_numCols = C_lastCol - C_firstCol +
1406 OrdinalTraits<LocalOrdinal>::one();
1407 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1408 OrdinalTraits<LocalOrdinal>::one();
1410 if (C_numCols_import > C_numCols)
1411 C_numCols = C_numCols_import;
1413 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1414 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1415 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1417 Array<Scalar> C_row_i = dwork;
1418 Array<GlobalOrdinal> C_cols = iwork;
1419 Array<size_t> c_index = iwork2;
1420 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1421 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1423 size_t C_row_i_length, j, k, last_index;
1426 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1427 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getLocalNumElements(),LO_INVALID);
1428 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getLocalNumElements(),LO_INVALID);
1429 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1431 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1432 Aview.colMap->getMaxLocalIndex(); i++)
1437 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1438 Aview.colMap->getMaxLocalIndex(); i++) {
1439 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1440 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1441 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1442 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1452 auto Arowptr = Aview.origMatrix->getLocalRowPtrsHost();
1453 auto Acolind = Aview.origMatrix->getLocalIndicesHost();
1454 auto Avals = Aview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1455 auto Browptr = Bview.origMatrix->getLocalRowPtrsHost();
1456 auto Bcolind = Bview.origMatrix->getLocalIndicesHost();
1457 auto Bvals = Bview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1458 decltype(Browptr) Irowptr;
1459 decltype(Bcolind) Icolind;
1460 decltype(Bvals) Ivals;
1461 if(!Bview.importMatrix.is_null()) {
1462 Irowptr = Bview.importMatrix->getLocalRowPtrsHost();
1463 Icolind = Bview.importMatrix->getLocalIndicesHost();
1464 Ivals = Bview.importMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1467 bool C_filled = C.isFillComplete();
1469 for (
size_t i = 0; i < C_numCols; i++)
1470 c_index[i] = OrdinalTraits<size_t>::invalid();
1473 size_t Arows = Aview.rowMap->getLocalNumElements();
1474 for(
size_t i=0; i<Arows; ++i) {
1478 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1484 C_row_i_length = OrdinalTraits<size_t>::zero();
1486 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1487 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1488 const Scalar Aval = Avals[k];
1489 if (Aval == STS::zero())
1492 if (Ak == LO_INVALID)
1495 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1496 LocalOrdinal col = Bcolind[j];
1499 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1502 C_row_i[C_row_i_length] = Aval*Bvals[j];
1503 C_cols[C_row_i_length] = col;
1504 c_index[col] = C_row_i_length;
1509 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Bvals[j]);
1514 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1515 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1516 C_cols[ii] = bcols[C_cols[ii]];
1517 combined_index[ii] = C_cols[ii];
1518 combined_values[ii] = C_row_i[ii];
1520 last_index = C_row_i_length;
1526 C_row_i_length = OrdinalTraits<size_t>::zero();
1528 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1529 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1530 const Scalar Aval = Avals[k];
1531 if (Aval == STS::zero())
1534 if (Ak!=LO_INVALID)
continue;
1536 Ak = Acol2Irow[Acolind[k]];
1537 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1538 LocalOrdinal col = Icolind[j];
1541 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1544 C_row_i[C_row_i_length] = Aval*Ivals[j];
1545 C_cols[C_row_i_length] = col;
1546 c_index[col] = C_row_i_length;
1552 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Ivals[j]);
1557 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1558 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1559 C_cols[ii] = bcols_import[C_cols[ii]];
1560 combined_index[last_index] = C_cols[ii];
1561 combined_values[last_index] = C_row_i[ii];
1568 C.sumIntoGlobalValues(
1570 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1571 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1573 C.insertGlobalValues(
1575 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1576 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1582template<
class Scalar,
1584 class GlobalOrdinal,
1586void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1587 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1588 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1590 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1591 Mview.maxNumRowEntries = Mview.indices[0].size();
1593 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1594 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1595 Mview.maxNumRowEntries = Mview.indices[i].size();
1600template<
class CrsMatrixType>
1601size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1603 size_t Aest = 100, Best=100;
1604 if (A.getLocalNumEntries() >= A.getLocalNumRows())
1605 Aest = (A.getLocalNumRows() > 0) ? A.getLocalNumEntries()/A.getLocalNumRows() : 100;
1606 if (B.getLocalNumEntries() >= B.getLocalNumRows())
1607 Best = (B.getLocalNumRows() > 0) ? B.getLocalNumEntries()/B.getLocalNumRows() : 100;
1609 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1610 nnzperrow *= nnzperrow;
1612 return (
size_t)(A.getLocalNumRows()*nnzperrow*0.75 + 100);
1622template<
class Scalar,
1624 class GlobalOrdinal,
1626void mult_A_B_newmatrix(
1627 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1628 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1629 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1630 const std::string& label,
1631 const Teuchos::RCP<Teuchos::ParameterList>& params)
1633 using Teuchos::Array;
1634 using Teuchos::ArrayRCP;
1635 using Teuchos::ArrayView;
1640 typedef LocalOrdinal LO;
1641 typedef GlobalOrdinal GO;
1643 typedef Import<LO,GO,NO> import_type;
1644 typedef Map<LO,GO,NO> map_type;
1647 typedef typename map_type::local_map_type local_map_type;
1649 typedef typename KCRS::StaticCrsGraphType graph_t;
1650 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1651 typedef typename NO::execution_space execution_space;
1652 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1653 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1655#ifdef HAVE_TPETRA_MMM_TIMINGS
1656 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1657 using Teuchos::TimeMonitor;
1658 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1660 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1663 RCP<const import_type> Cimport;
1664 RCP<const map_type> Ccolmap;
1665 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1666 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1667 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1668 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1669 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1670 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1671 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1672 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1679 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
1681 if (Bview.importMatrix.is_null()) {
1684 Ccolmap = Bview.colMap;
1685 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1687 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1688 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1689 KOKKOS_LAMBDA(
const LO i) {
1702 if (!Bimport.is_null() && !Iimport.is_null()) {
1703 Cimport = Bimport->setUnion(*Iimport);
1705 else if (!Bimport.is_null() && Iimport.is_null()) {
1706 Cimport = Bimport->setUnion();
1708 else if (Bimport.is_null() && !Iimport.is_null()) {
1709 Cimport = Iimport->setUnion();
1712 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1714 Ccolmap = Cimport->getTargetMap();
1719 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1720 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1727 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
1728 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1729 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
1730 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1732 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
1733 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1742 C.replaceColMap(Ccolmap);
1760 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
1761 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
1763 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
1764 GO aidx = Acolmap_local.getGlobalElement(i);
1765 LO B_LID = Browmap_local.getLocalElement(aidx);
1766 if (B_LID != LO_INVALID) {
1767 targetMapToOrigRow(i) = B_LID;
1768 targetMapToImportRow(i) = LO_INVALID;
1770 LO I_LID = Irowmap_local.getLocalElement(aidx);
1771 targetMapToOrigRow(i) = LO_INVALID;
1772 targetMapToImportRow(i) = I_LID;
1779 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
1785template<
class Scalar,
1787 class GlobalOrdinal,
1789void mult_A_B_newmatrix(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1790 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1791 Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &C)
1793 using Teuchos::null;
1794 using Teuchos::Array;
1795 using Teuchos::ArrayRCP;
1796 using Teuchos::ArrayView;
1801 typedef LocalOrdinal LO;
1802 typedef GlobalOrdinal GO;
1804 typedef Import<LO,GO,NO> import_type;
1805 typedef Map<LO,GO,NO> map_type;
1806 typedef BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> block_crs_matrix_type;
1807 typedef typename block_crs_matrix_type::crs_graph_type graph_t;
1810 typedef typename map_type::local_map_type local_map_type;
1811 typedef typename block_crs_matrix_type::local_matrix_device_type KBSR;
1812 typedef typename KBSR::device_type device_t;
1813 typedef typename KBSR::StaticCrsGraphType static_graph_t;
1814 typedef typename static_graph_t::row_map_type::non_const_type lno_view_t;
1815 typedef typename static_graph_t::entries_type::non_const_type lno_nnz_view_t;
1816 typedef typename KBSR::values_type::non_const_type scalar_view_t;
1817 typedef typename NO::execution_space execution_space;
1818 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1819 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1821 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1824 RCP<const import_type> Cimport;
1825 RCP<const map_type> Ccolmap;
1826 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1827 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1828 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1829 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1830 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1831 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1832 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1833 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1840 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
1842 if (Bview.importMatrix.is_null()) {
1845 Ccolmap = Bview.colMap;
1846 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1848 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1849 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1850 KOKKOS_LAMBDA(
const LO i) {
1863 if (!Bimport.is_null() && !Iimport.is_null()) {
1864 Cimport = Bimport->setUnion(*Iimport);
1866 else if (!Bimport.is_null() && Iimport.is_null()) {
1867 Cimport = Bimport->setUnion();
1869 else if (Bimport.is_null() && !Iimport.is_null()) {
1870 Cimport = Iimport->setUnion();
1873 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1875 Ccolmap = Cimport->getTargetMap();
1882 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
1883 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1884 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",
1885 range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),
1886 KOKKOS_LAMBDA(
const LO i) {
1887 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1889 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",
1890 range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),
1891 KOKKOS_LAMBDA(
const LO i) {
1892 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1912 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),
1913 Aview.colMap->getLocalNumElements());
1914 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),
1915 Aview.colMap->getLocalNumElements());
1917 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",
1918 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),
1919 KOKKOS_LAMBDA(
const LO i) {
1920 GO aidx = Acolmap_local.getGlobalElement(i);
1921 LO B_LID = Browmap_local.getLocalElement(aidx);
1922 if (B_LID != LO_INVALID) {
1923 targetMapToOrigRow(i) = B_LID;
1924 targetMapToImportRow(i) = LO_INVALID;
1926 LO I_LID = Irowmap_local.getLocalElement(aidx);
1927 targetMapToOrigRow(i) = LO_INVALID;
1928 targetMapToImportRow(i) = I_LID;
1933 using KernelHandle =
1934 KokkosKernels::Experimental::KokkosKernelsHandle<
typename lno_view_t::const_value_type,
1935 typename lno_nnz_view_t::const_value_type,
1936 typename scalar_view_t::const_value_type,
1937 typename device_t::execution_space,
1938 typename device_t::memory_space,
1939 typename device_t::memory_space>;
1940 int team_work_size = 16;
1941 std::string myalg(
"SPGEMM_KK_MEMORY");
1942 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
1945 kh.create_spgemm_handle(alg_enum);
1946 kh.set_team_work_size(team_work_size);
1949 const KBSR Amat = Aview.origMatrix->getLocalMatrixDevice();
1950 const KBSR Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,
1951 targetMapToOrigRow,targetMapToImportRow,
1952 Bcol2Ccol,Icol2Ccol,
1953 Ccolmap.getConst()->getLocalNumElements());
1955 RCP<graph_t> graphC;
1956 typename KBSR::values_type values;
1961 KokkosSparse::block_spgemm_symbolic(kh, Amat,
false, Bmerged,
false, Cmat);
1962 KokkosSparse::block_spgemm_numeric (kh, Amat,
false, Bmerged,
false, Cmat);
1963 kh.destroy_spgemm_handle();
1966 graphC = rcp(
new graph_t(Cmat.graph, Aview.origMatrix->getRowMap(), Ccolmap.getConst()));
1967 values = Cmat.values;
1969 C = rcp (
new block_crs_matrix_type (*graphC, values, Aview.blocksize));
1975template<
class Scalar,
1977 class GlobalOrdinal,
1979 class LocalOrdinalViewType>
1980void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1981 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1982 const LocalOrdinalViewType & targetMapToOrigRow,
1983 const LocalOrdinalViewType & targetMapToImportRow,
1984 const LocalOrdinalViewType & Bcol2Ccol,
1985 const LocalOrdinalViewType & Icol2Ccol,
1986 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1987 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
1988 const std::string& label,
1989 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1990#ifdef HAVE_TPETRA_MMM_TIMINGS
1991 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1992 using Teuchos::TimeMonitor;
1993 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
1996 using Teuchos::Array;
1997 using Teuchos::ArrayRCP;
1998 using Teuchos::ArrayView;
2003 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2004 typedef typename KCRS::StaticCrsGraphType graph_t;
2005 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2006 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2007 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2008 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2011 typedef LocalOrdinal LO;
2012 typedef GlobalOrdinal GO;
2014 typedef Map<LO,GO,NO> map_type;
2015 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2016 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2017 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2020 RCP<const map_type> Ccolmap = C.getColMap();
2021 size_t m = Aview.origMatrix->getLocalNumRows();
2022 size_t n = Ccolmap->getLocalNumElements();
2023 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2026 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2027 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2029 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2030 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2031 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2033 c_lno_view_t Irowptr;
2034 lno_nnz_view_t Icolind;
2035 scalar_view_t Ivals;
2036 if(!Bview.importMatrix.is_null()) {
2037 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2038 Irowptr = lclB.graph.row_map;
2039 Icolind = lclB.graph.entries;
2040 Ivals = lclB.values;
2041 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getLocalMaxNumRowEntries());
2044#ifdef HAVE_TPETRA_MMM_TIMINGS
2045 RCP<TimeMonitor> MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2055 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2056 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2057 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2058 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2068 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2069 std::vector<size_t> c_status(n, ST_INVALID);
2079 size_t CSR_ip = 0, OLD_ip = 0;
2080 for (
size_t i = 0; i < m; i++) {
2083 Crowptr[i] = CSR_ip;
2086 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2087 LO Aik = Acolind[k];
2088 const SC Aval = Avals[k];
2089 if (Aval == SC_ZERO)
2092 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2099 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2102 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2103 LO Bkj = Bcolind[j];
2104 LO Cij = Bcol2Ccol[Bkj];
2106 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2108 c_status[Cij] = CSR_ip;
2109 Ccolind[CSR_ip] = Cij;
2110 Cvals[CSR_ip] = Aval*Bvals[j];
2114 Cvals[c_status[Cij]] += Aval*Bvals[j];
2125 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2126 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2127 LO Ikj = Icolind[j];
2128 LO Cij = Icol2Ccol[Ikj];
2130 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
2132 c_status[Cij] = CSR_ip;
2133 Ccolind[CSR_ip] = Cij;
2134 Cvals[CSR_ip] = Aval*Ivals[j];
2137 Cvals[c_status[Cij]] += Aval*Ivals[j];
2144 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1])*b_max_nnz_per_row) > CSR_alloc) {
2146 Kokkos::resize(Ccolind,CSR_alloc);
2147 Kokkos::resize(Cvals,CSR_alloc);
2152 Crowptr[m] = CSR_ip;
2155 Kokkos::resize(Ccolind,CSR_ip);
2156 Kokkos::resize(Cvals,CSR_ip);
2158#ifdef HAVE_TPETRA_MMM_TIMINGS
2160 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort")));
2164 if (params.is_null() || params->get(
"sort entries",
true))
2165 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2166 C.setAllValues(Crowptr,Ccolind, Cvals);
2169#ifdef HAVE_TPETRA_MMM_TIMINGS
2171 auto MM4(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC")));
2183 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2184 labelList->set(
"Timer Label",label);
2185 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2186 RCP<const Export<LO,GO,NO> > dummyExport;
2187 C.expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2188#ifdef HAVE_TPETRA_MMM_TIMINGS
2190 MM2 = Teuchos::null;
2196template<
class Scalar,
2198 class GlobalOrdinal,
2201 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2202 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2203 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2204 const std::string& label,
2205 const Teuchos::RCP<Teuchos::ParameterList>& params)
2207 using Teuchos::Array;
2208 using Teuchos::ArrayRCP;
2209 using Teuchos::ArrayView;
2214 typedef LocalOrdinal LO;
2215 typedef GlobalOrdinal GO;
2217 typedef Import<LO,GO,NO> import_type;
2218 typedef Map<LO,GO,NO> map_type;
2221 typedef typename map_type::local_map_type local_map_type;
2223 typedef typename KCRS::StaticCrsGraphType graph_t;
2224 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2225 typedef typename NO::execution_space execution_space;
2226 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2227 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2229#ifdef HAVE_TPETRA_MMM_TIMINGS
2230 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2231 using Teuchos::TimeMonitor;
2232 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
2234 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2237 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2238 RCP<const map_type> Ccolmap = C.getColMap();
2239 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2240 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2241 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2242 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2243 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2244 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2247 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2251 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2252 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2255 if (!Bview.importMatrix.is_null()) {
2256 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2257 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2259 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2260 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2261 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2267 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2268 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2269 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2270 GO aidx = Acolmap_local.getGlobalElement(i);
2271 LO B_LID = Browmap_local.getLocalElement(aidx);
2272 if (B_LID != LO_INVALID) {
2273 targetMapToOrigRow(i) = B_LID;
2274 targetMapToImportRow(i) = LO_INVALID;
2276 LO I_LID = Irowmap_local.getLocalElement(aidx);
2277 targetMapToOrigRow(i) = LO_INVALID;
2278 targetMapToImportRow(i) = I_LID;
2285 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2289template<
class Scalar,
2291 class GlobalOrdinal,
2293 class LocalOrdinalViewType>
2294void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2295 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2296 const LocalOrdinalViewType & targetMapToOrigRow,
2297 const LocalOrdinalViewType & targetMapToImportRow,
2298 const LocalOrdinalViewType & Bcol2Ccol,
2299 const LocalOrdinalViewType & Icol2Ccol,
2300 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2301 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2302 const std::string& label,
2303 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2304#ifdef HAVE_TPETRA_MMM_TIMINGS
2305 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2306 using Teuchos::TimeMonitor;
2307 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
2308 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2317 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2318 typedef typename KCRS::StaticCrsGraphType graph_t;
2319 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2320 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2321 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2324 typedef LocalOrdinal LO;
2325 typedef GlobalOrdinal GO;
2327 typedef Map<LO,GO,NO> map_type;
2328 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2329 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2330 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2333 RCP<const map_type> Ccolmap = C.getColMap();
2334 size_t m = Aview.origMatrix->getLocalNumRows();
2335 size_t n = Ccolmap->getLocalNumElements();
2338 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2339 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2340 const KCRS & Cmat = C.getLocalMatrixHost();
2342 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2343 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2344 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2345 scalar_view_t Cvals = Cmat.values;
2347 c_lno_view_t Irowptr;
2348 lno_nnz_view_t Icolind;
2349 scalar_view_t Ivals;
2350 if(!Bview.importMatrix.is_null()) {
2351 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2352 Irowptr = lclB.graph.row_map;
2353 Icolind = lclB.graph.entries;
2354 Ivals = lclB.values;
2357#ifdef HAVE_TPETRA_MMM_TIMINGS
2358 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2370 std::vector<size_t> c_status(n, ST_INVALID);
2373 size_t CSR_ip = 0, OLD_ip = 0;
2374 for (
size_t i = 0; i < m; i++) {
2377 OLD_ip = Crowptr[i];
2378 CSR_ip = Crowptr[i+1];
2379 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2380 c_status[Ccolind[k]] = k;
2386 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2387 LO Aik = Acolind[k];
2388 const SC Aval = Avals[k];
2389 if (Aval == SC_ZERO)
2392 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2394 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2396 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2397 LO Bkj = Bcolind[j];
2398 LO Cij = Bcol2Ccol[Bkj];
2400 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2401 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2402 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2404 Cvals[c_status[Cij]] += Aval * Bvals[j];
2409 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2410 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2411 LO Ikj = Icolind[j];
2412 LO Cij = Icol2Ccol[Ikj];
2414 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2415 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2416 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2418 Cvals[c_status[Cij]] += Aval * Ivals[j];
2424#ifdef HAVE_TPETRA_MMM_TIMINGS
2425 auto MM3 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse ESFC"))));
2428 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2434template<
class Scalar,
2436 class GlobalOrdinal,
2438void jacobi_A_B_newmatrix(
2440 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2441 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2442 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2443 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2444 const std::string& label,
2445 const Teuchos::RCP<Teuchos::ParameterList>& params)
2447 using Teuchos::Array;
2448 using Teuchos::ArrayRCP;
2449 using Teuchos::ArrayView;
2453 typedef LocalOrdinal LO;
2454 typedef GlobalOrdinal GO;
2457 typedef Import<LO,GO,NO> import_type;
2458 typedef Map<LO,GO,NO> map_type;
2459 typedef typename map_type::local_map_type local_map_type;
2463 typedef typename KCRS::StaticCrsGraphType graph_t;
2464 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2465 typedef typename NO::execution_space execution_space;
2466 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2467 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2470#ifdef HAVE_TPETRA_MMM_TIMINGS
2471 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2472 using Teuchos::TimeMonitor;
2473 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
2475 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2478 RCP<const import_type> Cimport;
2479 RCP<const map_type> Ccolmap;
2480 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2481 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
2482 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2483 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2484 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2485 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2486 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2487 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2494 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2496 if (Bview.importMatrix.is_null()) {
2499 Ccolmap = Bview.colMap;
2503 Kokkos::RangePolicy<execution_space, LO> range (0,
static_cast<LO
> (Bview.colMap->getLocalNumElements ()));
2504 Kokkos::parallel_for (range, KOKKOS_LAMBDA (
const size_t i) {
2505 Bcol2Ccol(i) =
static_cast<LO
> (i);
2516 if (!Bimport.is_null() && !Iimport.is_null()){
2517 Cimport = Bimport->setUnion(*Iimport);
2518 Ccolmap = Cimport->getTargetMap();
2520 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2521 Cimport = Bimport->setUnion();
2523 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2524 Cimport = Iimport->setUnion();
2527 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2529 Ccolmap = Cimport->getTargetMap();
2531 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2532 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2539 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2540 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2541 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2542 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2544 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2545 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2554 C.replaceColMap(Ccolmap);
2572 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2573 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2574 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2575 GO aidx = Acolmap_local.getGlobalElement(i);
2576 LO B_LID = Browmap_local.getLocalElement(aidx);
2577 if (B_LID != LO_INVALID) {
2578 targetMapToOrigRow(i) = B_LID;
2579 targetMapToImportRow(i) = LO_INVALID;
2581 LO I_LID = Irowmap_local.getLocalElement(aidx);
2582 targetMapToOrigRow(i) = LO_INVALID;
2583 targetMapToImportRow(i) = I_LID;
2590 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2599template<
class Scalar,
2601 class GlobalOrdinal,
2603 class LocalOrdinalViewType>
2604void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2605 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Dinv,
2606 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2607 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2608 const LocalOrdinalViewType & targetMapToOrigRow,
2609 const LocalOrdinalViewType & targetMapToImportRow,
2610 const LocalOrdinalViewType & Bcol2Ccol,
2611 const LocalOrdinalViewType & Icol2Ccol,
2612 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2613 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2614 const std::string& label,
2615 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2617#ifdef HAVE_TPETRA_MMM_TIMINGS
2618 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2619 using Teuchos::TimeMonitor;
2620 auto MM(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Nemwmatrix SerialCore")));
2623 using Teuchos::Array;
2624 using Teuchos::ArrayRCP;
2625 using Teuchos::ArrayView;
2630 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2631 typedef typename KCRS::StaticCrsGraphType graph_t;
2632 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2633 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2634 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2635 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2638 typedef typename scalar_view_t::memory_space scalar_memory_space;
2641 typedef LocalOrdinal LO;
2642 typedef GlobalOrdinal GO;
2645 typedef Map<LO,GO,NO> map_type;
2646 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2647 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2650 RCP<const map_type> Ccolmap = C.getColMap();
2651 size_t m = Aview.origMatrix->getLocalNumRows();
2652 size_t n = Ccolmap->getLocalNumElements();
2653 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2656 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2657 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2659 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2660 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2661 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2663 c_lno_view_t Irowptr;
2664 lno_nnz_view_t Icolind;
2665 scalar_view_t Ivals;
2666 if(!Bview.importMatrix.is_null()) {
2667 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2668 Irowptr = lclB.graph.row_map;
2669 Icolind = lclB.graph.entries;
2670 Ivals = lclB.values;
2671 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getLocalMaxNumRowEntries());
2676 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2684 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2685 Array<size_t> c_status(n, ST_INVALID);
2694 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2695 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"),m+1);
2696 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"),CSR_alloc);
2697 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"),CSR_alloc);
2698 size_t CSR_ip = 0, OLD_ip = 0;
2700 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2714 for (
size_t i = 0; i < m; i++) {
2717 Crowptr[i] = CSR_ip;
2718 SC minusOmegaDval = -omega*Dvals(i,0);
2721 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2722 Scalar Bval = Bvals[j];
2723 if (Bval == SC_ZERO)
2725 LO Bij = Bcolind[j];
2726 LO Cij = Bcol2Ccol[Bij];
2729 c_status[Cij] = CSR_ip;
2730 Ccolind[CSR_ip] = Cij;
2731 Cvals[CSR_ip] = Bvals[j];
2736 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2737 LO Aik = Acolind[k];
2738 const SC Aval = Avals[k];
2739 if (Aval == SC_ZERO)
2742 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2744 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2746 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2747 LO Bkj = Bcolind[j];
2748 LO Cij = Bcol2Ccol[Bkj];
2750 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2752 c_status[Cij] = CSR_ip;
2753 Ccolind[CSR_ip] = Cij;
2754 Cvals[CSR_ip] = minusOmegaDval* Aval * Bvals[j];
2758 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Bvals[j];
2764 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2765 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2766 LO Ikj = Icolind[j];
2767 LO Cij = Icol2Ccol[Ikj];
2769 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2771 c_status[Cij] = CSR_ip;
2772 Ccolind[CSR_ip] = Cij;
2773 Cvals[CSR_ip] = minusOmegaDval* Aval * Ivals[j];
2776 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Ivals[j];
2783 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1]+1)*b_max_nnz_per_row) > CSR_alloc) {
2785 Kokkos::resize(Ccolind,CSR_alloc);
2786 Kokkos::resize(Cvals,CSR_alloc);
2790 Crowptr[m] = CSR_ip;
2793 Kokkos::resize(Ccolind,CSR_ip);
2794 Kokkos::resize(Cvals,CSR_ip);
2797#ifdef HAVE_TPETRA_MMM_TIMINGS
2798 auto MM2(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort")));
2805 C.replaceColMap(Ccolmap);
2812 if (params.is_null() || params->get(
"sort entries",
true))
2813 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2814 C.setAllValues(Crowptr,Ccolind, Cvals);
2817#ifdef HAVE_TPETRA_MMM_TIMINGS
2818 auto MM3(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC")));
2829 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2830 labelList->set(
"Timer Label",label);
2831 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2832 RCP<const Export<LO,GO,NO> > dummyExport;
2833 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2841template<
class Scalar,
2843 class GlobalOrdinal,
2845void jacobi_A_B_reuse(
2847 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2848 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2849 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2850 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2851 const std::string& label,
2852 const Teuchos::RCP<Teuchos::ParameterList>& params)
2854 using Teuchos::Array;
2855 using Teuchos::ArrayRCP;
2856 using Teuchos::ArrayView;
2860 typedef LocalOrdinal LO;
2861 typedef GlobalOrdinal GO;
2864 typedef Import<LO,GO,NO> import_type;
2865 typedef Map<LO,GO,NO> map_type;
2868 typedef typename map_type::local_map_type local_map_type;
2870 typedef typename KCRS::StaticCrsGraphType graph_t;
2871 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2872 typedef typename NO::execution_space execution_space;
2873 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2874 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2876#ifdef HAVE_TPETRA_MMM_TIMINGS
2877 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2878 using Teuchos::TimeMonitor;
2879 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2881 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2884 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2885 RCP<const map_type> Ccolmap = C.getColMap();
2886 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2887 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2888 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2889 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2890 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2891 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2894 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getLocalNumElements()), Icol2Ccol;
2898 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2899 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2902 if (!Bview.importMatrix.is_null()) {
2903 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2904 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2906 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getLocalNumElements());
2907 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getLocalNumElements()),KOKKOS_LAMBDA(
const LO i) {
2908 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2914 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getLocalNumElements());
2915 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getLocalNumElements());
2916 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2917 GO aidx = Acolmap_local.getGlobalElement(i);
2918 LO B_LID = Browmap_local.getLocalElement(aidx);
2919 if (B_LID != LO_INVALID) {
2920 targetMapToOrigRow(i) = B_LID;
2921 targetMapToImportRow(i) = LO_INVALID;
2923 LO I_LID = Irowmap_local.getLocalElement(aidx);
2924 targetMapToOrigRow(i) = LO_INVALID;
2925 targetMapToImportRow(i) = I_LID;
2930#ifdef HAVE_TPETRA_MMM_TIMINGS
2936 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2942template<
class Scalar,
2944 class GlobalOrdinal,
2946 class LocalOrdinalViewType>
2947void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
2948 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2949 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2950 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2951 const LocalOrdinalViewType & targetMapToOrigRow,
2952 const LocalOrdinalViewType & targetMapToImportRow,
2953 const LocalOrdinalViewType & Bcol2Ccol,
2954 const LocalOrdinalViewType & Icol2Ccol,
2955 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2956 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > ,
2957 const std::string& label,
2958 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2959#ifdef HAVE_TPETRA_MMM_TIMINGS
2960 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2961 using Teuchos::TimeMonitor;
2962 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
2963 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2971 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
2972 typedef typename KCRS::StaticCrsGraphType graph_t;
2973 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2974 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2975 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2976 typedef typename scalar_view_t::memory_space scalar_memory_space;
2979 typedef LocalOrdinal LO;
2980 typedef GlobalOrdinal GO;
2982 typedef Map<LO,GO,NO> map_type;
2983 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2984 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2985 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2988 RCP<const map_type> Ccolmap = C.getColMap();
2989 size_t m = Aview.origMatrix->getLocalNumRows();
2990 size_t n = Ccolmap->getLocalNumElements();
2993 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
2994 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
2995 const KCRS & Cmat = C.getLocalMatrixHost();
2997 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2998 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2999 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
3000 scalar_view_t Cvals = Cmat.values;
3002 c_lno_view_t Irowptr;
3003 lno_nnz_view_t Icolind;
3004 scalar_view_t Ivals;
3005 if(!Bview.importMatrix.is_null()) {
3006 auto lclB = Bview.importMatrix->getLocalMatrixHost();
3007 Irowptr = lclB.graph.row_map;
3008 Icolind = lclB.graph.entries;
3009 Ivals = lclB.values;
3014 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
3016#ifdef HAVE_TPETRA_MMM_TIMINGS
3017 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore - Compare"))));
3025 std::vector<size_t> c_status(n, ST_INVALID);
3028 size_t CSR_ip = 0, OLD_ip = 0;
3029 for (
size_t i = 0; i < m; i++) {
3033 OLD_ip = Crowptr[i];
3034 CSR_ip = Crowptr[i+1];
3035 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
3036 c_status[Ccolind[k]] = k;
3042 SC minusOmegaDval = -omega*Dvals(i,0);
3045 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
3046 Scalar Bval = Bvals[j];
3047 if (Bval == SC_ZERO)
3049 LO Bij = Bcolind[j];
3050 LO Cij = Bcol2Ccol[Bij];
3052 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3053 std::runtime_error,
"Trying to insert a new entry into a static graph");
3055 Cvals[c_status[Cij]] = Bvals[j];
3059 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
3060 LO Aik = Acolind[k];
3061 const SC Aval = Avals[k];
3062 if (Aval == SC_ZERO)
3065 if (targetMapToOrigRow[Aik] != LO_INVALID) {
3067 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
3069 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
3070 LO Bkj = Bcolind[j];
3071 LO Cij = Bcol2Ccol[Bkj];
3073 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3074 std::runtime_error,
"Trying to insert a new entry into a static graph");
3076 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
3081 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
3082 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
3083 LO Ikj = Icolind[j];
3084 LO Cij = Icol2Ccol[Ikj];
3086 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3087 std::runtime_error,
"Trying to insert a new entry into a static graph");
3089 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3095#ifdef HAVE_TPETRA_MMM_TIMINGS
3096 MM2 = Teuchos::null;
3097 MM2 = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
3100 C.fillComplete(C.getDomainMap(), C.getRangeMap());
3101#ifdef HAVE_TPETRA_MMM_TIMINGS
3102 MM2 = Teuchos::null;
3111template<
class Scalar,
3113 class GlobalOrdinal,
3115void import_and_extract_views(
3116 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
3117 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3118 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3119 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3120 bool userAssertsThereAreNoRemotes,
3121 const std::string& label,
3122 const Teuchos::RCP<Teuchos::ParameterList>& params)
3124 using Teuchos::Array;
3125 using Teuchos::ArrayView;
3128 using Teuchos::null;
3131 typedef LocalOrdinal LO;
3132 typedef GlobalOrdinal GO;
3135 typedef Map<LO,GO,NO> map_type;
3136 typedef Import<LO,GO,NO> import_type;
3137 typedef CrsMatrix<SC,LO,GO,NO> crs_matrix_type;
3139#ifdef HAVE_TPETRA_MMM_TIMINGS
3140 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3141 using Teuchos::TimeMonitor;
3142 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
3150 Aview.deleteContents();
3152 Aview.origMatrix = rcp(&A,
false);
3153 Aview.origRowMap = A.getRowMap();
3154 Aview.rowMap = targetMap;
3155 Aview.colMap = A.getColMap();
3156 Aview.domainMap = A.getDomainMap();
3157 Aview.importColMap = null;
3158 RCP<const map_type> rowMap = A.getRowMap();
3159 const int numProcs = rowMap->getComm()->getSize();
3162 if (userAssertsThereAreNoRemotes || numProcs < 2)
3165 RCP<const import_type> importer;
3166 if (params != null && params->isParameter(
"importer")) {
3167 importer = params->get<RCP<const import_type> >(
"importer");
3170#ifdef HAVE_TPETRA_MMM_TIMINGS
3172 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
3177 RCP<const map_type> remoteRowMap;
3178 size_t numRemote = 0;
3180 if (!prototypeImporter.is_null() &&
3181 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3182 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3184#ifdef HAVE_TPETRA_MMM_TIMINGS
3185 TimeMonitor MM2 = *TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap-Mode1"));
3187 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3188 numRemote = prototypeImporter->getNumRemoteIDs();
3190 Array<GO> remoteRows(numRemote);
3191 for (
size_t i = 0; i < numRemote; i++)
3192 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3194 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3195 rowMap->getIndexBase(), rowMap->getComm()));
3198 }
else if (prototypeImporter.is_null()) {
3200#ifdef HAVE_TPETRA_MMM_TIMINGS
3201 TimeMonitor MM2 = *TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap-Mode2"));
3203 ArrayView<const GO> rows = targetMap->getLocalElementList();
3204 size_t numRows = targetMap->getLocalNumElements();
3206 Array<GO> remoteRows(numRows);
3207 for(
size_t i = 0; i < numRows; ++i) {
3208 const LO mlid = rowMap->getLocalElement(rows[i]);
3210 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3211 remoteRows[numRemote++] = rows[i];
3213 remoteRows.resize(numRemote);
3214 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3215 rowMap->getIndexBase(), rowMap->getComm()));
3224 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3225 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3234#ifdef HAVE_TPETRA_MMM_TIMINGS
3236 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
3240 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3242 if (globalMaxNumRemote > 0) {
3243#ifdef HAVE_TPETRA_MMM_TIMINGS
3245 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
3249 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3251 importer = rcp(
new import_type(rowMap, remoteRowMap));
3253 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3257 params->set(
"importer", importer);
3260 if (importer != null) {
3261#ifdef HAVE_TPETRA_MMM_TIMINGS
3263 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
3267 Teuchos::ParameterList labelList;
3268 labelList.set(
"Timer Label", label);
3269 auto & labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
3272 bool overrideAllreduce =
false;
3275 Teuchos::ParameterList params_sublist;
3276 if(!params.is_null()) {
3277 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3278 params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
3279 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3280 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3281 if(mm_optimization_core_count2<mm_optimization_core_count) mm_optimization_core_count=mm_optimization_core_count2;
3282 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
3283 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
3285 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete",isMM);
3286 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
3287 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck",overrideAllreduce);
3290 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3296 sprintf(str,
"import_matrix.%d.dat",count);
3301#ifdef HAVE_TPETRA_MMM_STATISTICS
3302 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3306#ifdef HAVE_TPETRA_MMM_TIMINGS
3308 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
3312 Aview.importColMap = Aview.importMatrix->getColMap();
3313#ifdef HAVE_TPETRA_MMM_TIMINGS
3320template<
class Scalar,
3322 class GlobalOrdinal,
3324void import_and_extract_views(
3325 const BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& M,
3326 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3327 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview,
3328 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3329 bool userAssertsThereAreNoRemotes)
3331 using Teuchos::Array;
3332 using Teuchos::ArrayView;
3335 using Teuchos::null;
3338 typedef LocalOrdinal LO;
3339 typedef GlobalOrdinal GO;
3342 typedef Map<LO,GO,NO> map_type;
3343 typedef Import<LO,GO,NO> import_type;
3344 typedef BlockCrsMatrix<SC,LO,GO,NO> blockcrs_matrix_type;
3352 Mview.deleteContents();
3355 Mview.origRowMap = M.getRowMap();
3356 Mview.rowMap = targetMap;
3357 Mview.colMap = M.getColMap();
3358 Mview.importColMap = null;
3359 RCP<const map_type> rowMap = M.getRowMap();
3360 const int numProcs = rowMap->getComm()->getSize();
3363 if (userAssertsThereAreNoRemotes || numProcs < 2)
return;
3367 RCP<const map_type> remoteRowMap;
3368 size_t numRemote = 0;
3370 if (!prototypeImporter.is_null() &&
3371 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3372 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3375 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3376 numRemote = prototypeImporter->getNumRemoteIDs();
3378 Array<GO> remoteRows(numRemote);
3379 for (
size_t i = 0; i < numRemote; i++)
3380 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3382 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3383 rowMap->getIndexBase(), rowMap->getComm()));
3386 }
else if (prototypeImporter.is_null()) {
3389 ArrayView<const GO> rows = targetMap->getLocalElementList();
3390 size_t numRows = targetMap->getLocalNumElements();
3392 Array<GO> remoteRows(numRows);
3393 for(
size_t i = 0; i < numRows; ++i) {
3394 const LO mlid = rowMap->getLocalElement(rows[i]);
3396 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3397 remoteRows[numRemote++] = rows[i];
3399 remoteRows.resize(numRemote);
3400 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3401 rowMap->getIndexBase(), rowMap->getComm()));
3410 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3411 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3419 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3421 RCP<const import_type> importer;
3423 if (globalMaxNumRemote > 0) {
3426 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3428 importer = rcp(
new import_type(rowMap, remoteRowMap));
3430 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match M.getRowMap()!");
3433 if (importer != null) {
3439 Mview.importColMap = Mview.importMatrix->getColMap();
3445template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3447merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3448 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3449 const LocalOrdinalViewType & Acol2Brow,
3450 const LocalOrdinalViewType & Acol2Irow,
3451 const LocalOrdinalViewType & Bcol2Ccol,
3452 const LocalOrdinalViewType & Icol2Ccol,
3453 const size_t mergedNodeNumCols) {
3457 typedef typename KCRS::StaticCrsGraphType graph_t;
3458 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3459 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3460 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3462 const KCRS & Ak = Aview.
origMatrix->getLocalMatrixDevice();
3463 const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
3466 if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3470 RCP<const KCRS> Ik_;
3471 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrixDevice());
3472 const KCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3474 if(Ik!=0) Iks = *Ik;
3475 size_t merge_numrows = Ak.numCols();
3477 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3479 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3482 typedef typename Node::execution_space execution_space;
3483 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3484 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3485 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3486 if(
final) Mrowptr(i) = update;
3489 if(Acol2Brow(i)!=LO_INVALID)
3490 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3492 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3495 if(
final && i+1==merge_numrows)
3496 Mrowptr(i+1)=update;
3500 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3501 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3502 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz);
3505 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3506 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3507 if(Acol2Brow(i)!=LO_INVALID) {
3508 size_t row = Acol2Brow(i);
3509 size_t start = Bk.graph.row_map(row);
3510 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3511 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
3512 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3516 size_t row = Acol2Irow(i);
3517 size_t start = Iks.graph.row_map(row);
3518 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3519 Mvalues(j) = Iks.values(j-Mrowptr(i)+start);
3520 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3525 KCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind);
3536template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3537const typename Tpetra::BlockCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_device_type
3538merge_matrices(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3539 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3540 const LocalOrdinalViewType & Acol2Brow,
3541 const LocalOrdinalViewType & Acol2Irow,
3542 const LocalOrdinalViewType & Bcol2Ccol,
3543 const LocalOrdinalViewType & Icol2Ccol,
3544 const size_t mergedNodeNumCols)
3547 typedef typename Tpetra::BlockCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_device_type KBCRS;
3548 typedef typename KBCRS::StaticCrsGraphType graph_t;
3549 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3550 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3551 typedef typename KBCRS::values_type::non_const_type scalar_view_t;
3554 const KBCRS & Ak = Aview.
origMatrix->getLocalMatrixDevice();
3555 const KBCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
3558 if(!Bview.importMatrix.is_null() ||
3559 (Bview.importMatrix.is_null() &&
3560 (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3565 RCP<const KBCRS> Ik_;
3566 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KBCRS>(Bview.importMatrix->getLocalMatrixDevice());
3567 const KBCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3569 if(Ik!=0) Iks = *Ik;
3570 size_t merge_numrows = Ak.numCols();
3573 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3575 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3578 typedef typename Node::execution_space execution_space;
3579 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3580 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3581 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3582 if(
final) Mrowptr(i) = update;
3585 if(Acol2Brow(i)!=LO_INVALID)
3586 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3588 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3591 if(
final && i+1==merge_numrows)
3592 Mrowptr(i+1)=update;
3596 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3597 const int blocksize = Ak.blockDim();
3598 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"),merge_nnz);
3599 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"),merge_nnz*blocksize*blocksize);
3602 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3603 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3604 if(Acol2Brow(i)!=LO_INVALID) {
3605 size_t row = Acol2Brow(i);
3606 size_t start = Bk.graph.row_map(row);
3607 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3608 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3610 for (
int b=0; b<blocksize*blocksize; ++b) {
3611 const int val_indx = j*blocksize*blocksize + b;
3612 const int b_val_indx = (j-Mrowptr(i)+start)*blocksize*blocksize + b;
3613 Mvalues(val_indx) = Bk.values(b_val_indx);
3618 size_t row = Acol2Irow(i);
3619 size_t start = Iks.graph.row_map(row);
3620 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3621 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3623 for (
int b=0; b<blocksize*blocksize; ++b) {
3624 const int val_indx = j*blocksize*blocksize + b;
3625 const int b_val_indx = (j-Mrowptr(i)+start)*blocksize*blocksize + b;
3626 Mvalues(val_indx) = Iks.values(b_val_indx);
3633 KBCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind, blocksize);
3643template<
typename SC,
typename LO,
typename GO,
typename NO>
3644void AddKernels<SC, LO, GO, NO>::
3646 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3647 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3648 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3649 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3650 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3651 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3652 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3653 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3654 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3655 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3656 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
3658 using Teuchos::TimeMonitor;
3659 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3660 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3661 auto nrows = Arowptrs.extent(0) - 1;
3662 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3663 typename AddKern::KKH handle;
3664 handle.create_spadd_handle(
true);
3665 auto addHandle = handle.get_spadd_handle();
3666#ifdef HAVE_TPETRA_MMM_TIMINGS
3667 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
3669 KokkosSparse::Experimental::spadd_symbolic
3670 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3672 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3673 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3674#ifdef HAVE_TPETRA_MMM_TIMINGS
3676 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted numeric")));
3678 KokkosSparse::Experimental::spadd_numeric(&handle,
3679 Arowptrs, Acolinds, Avals, scalarA,
3680 Browptrs, Bcolinds, Bvals, scalarB,
3681 Crowptrs, Ccolinds, Cvals);
3684template<
typename SC,
typename LO,
typename GO,
typename NO>
3685void AddKernels<SC, LO, GO, NO>::
3687 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3688 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3689 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3690 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3691 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3692 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3693 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3694 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3696 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3697 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3698 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
3700 using Teuchos::TimeMonitor;
3701 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3702 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3703 auto nrows = Arowptrs.extent(0) - 1;
3704 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3705 typedef MMdetails::AddKernels<SC, LO, GO, NO> AddKern;
3706 typename AddKern::KKH handle;
3707 handle.create_spadd_handle(
false);
3708 auto addHandle = handle.get_spadd_handle();
3709#ifdef HAVE_TPETRA_MMM_TIMINGS
3710 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() unsorted symbolic")));
3712 KokkosSparse::Experimental::spadd_symbolic
3713 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3715 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3716 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3717#ifdef HAVE_TPETRA_MMM_TIMINGS
3719 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() unsorted kernel: unsorted numeric")));
3721 KokkosSparse::Experimental::spadd_numeric(&handle,
3722 Arowptrs, Acolinds, Avals, scalarA,
3723 Browptrs, Bcolinds, Bvals, scalarB,
3724 Crowptrs, Ccolinds, Cvals);
3727template<
typename GO,
3728 typename LocalIndicesType,
3729 typename GlobalIndicesType,
3730 typename ColMapType>
3731struct ConvertLocalToGlobalFunctor
3733 ConvertLocalToGlobalFunctor(
3734 const LocalIndicesType& colindsOrig_,
3735 const GlobalIndicesType& colindsConverted_,
3736 const ColMapType& colmap_) :
3737 colindsOrig (colindsOrig_),
3738 colindsConverted (colindsConverted_),
3741 KOKKOS_INLINE_FUNCTION
void
3742 operator() (
const GO i)
const
3744 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
3746 LocalIndicesType colindsOrig;
3747 GlobalIndicesType colindsConverted;
3751template<
typename SC,
typename LO,
typename GO,
typename NO>
3752void MMdetails::AddKernels<SC, LO, GO, NO>::
3753convertToGlobalAndAdd(
3754 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
3755 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3756 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
3757 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3758 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
3759 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
3760 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3761 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3762 typename MMdetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds)
3764 using Teuchos::TimeMonitor;
3767 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GO, impl_scalar_type,
3768 typename NO::execution_space,
typename NO::memory_space,
typename NO::memory_space>;
3770 const values_array Avals = A.values;
3771 const values_array Bvals = B.values;
3772 const col_inds_array Acolinds = A.graph.entries;
3773 const col_inds_array Bcolinds = B.graph.entries;
3774 auto Arowptrs = A.graph.row_map;
3775 auto Browptrs = B.graph.row_map;
3776 global_col_inds_array AcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"A colinds (converted)"), Acolinds.extent(0));
3777 global_col_inds_array BcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"B colinds (converted)"), Bcolinds.extent(0));
3778#ifdef HAVE_TPETRA_MMM_TIMINGS
3779 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: " + std::string(
"column map conversion"))));
3781 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(Acolinds, AcolindsConverted, AcolMap);
3782 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
3783 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(Bcolinds, BcolindsConverted, BcolMap);
3784 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
3786 handle.create_spadd_handle(
false);
3787 auto addHandle = handle.get_spadd_handle();
3788#ifdef HAVE_TPETRA_MMM_TIMINGS
3790 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted symbolic")));
3792 auto nrows = Arowptrs.extent(0) - 1;
3793 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3794 KokkosSparse::Experimental::spadd_symbolic
3795 (&handle, Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
3796 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3797 Ccolinds = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3798#ifdef HAVE_TPETRA_MMM_TIMINGS
3800 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted numeric")));
3802 KokkosSparse::Experimental::spadd_numeric(&handle,
3803 Arowptrs, AcolindsConverted, Avals, scalarA,
3804 Browptrs, BcolindsConverted, Bvals, scalarB,
3805 Crowptrs, Ccolinds, Cvals);
3821#define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
3823 void MatrixMatrix::Multiply( \
3824 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3826 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3828 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3829 bool call_FillComplete_on_result, \
3830 const std::string & label, \
3831 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3834 void MatrixMatrix::Multiply( \
3835 const Teuchos::RCP<const BlockCrsMatrix< SCALAR , LO , GO , NODE > >& A, \
3837 const Teuchos::RCP<const BlockCrsMatrix< SCALAR , LO , GO , NODE > >& B, \
3839 Teuchos::RCP<BlockCrsMatrix< SCALAR , LO , GO , NODE > >& C, \
3840 const std::string & label); \
3843 void MatrixMatrix::Jacobi( \
3845 const Vector< SCALAR, LO, GO, NODE > & Dinv, \
3846 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3847 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3848 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3849 bool call_FillComplete_on_result, \
3850 const std::string & label, \
3851 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3854 void MatrixMatrix::Add( \
3855 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3858 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3861 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \
3864 void MatrixMatrix::Add( \
3865 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3868 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3872 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
3873 MatrixMatrix::add<SCALAR, LO, GO, NODE> \
3874 (const SCALAR & alpha, \
3875 const bool transposeA, \
3876 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3877 const SCALAR & beta, \
3878 const bool transposeB, \
3879 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3880 const Teuchos::RCP<const Map<LO, GO, NODE> >& domainMap, \
3881 const Teuchos::RCP<const Map<LO, GO, NODE> >& rangeMap, \
3882 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3886 MatrixMatrix::add< SCALAR , LO, GO , NODE > \
3887 (const SCALAR & alpha, \
3888 const bool transposeA, \
3889 const CrsMatrix< SCALAR , LO, GO , NODE >& A, \
3890 const SCALAR& beta, \
3891 const bool transposeB, \
3892 const CrsMatrix< SCALAR , LO, GO , NODE >& B, \
3893 CrsMatrix< SCALAR , LO, GO , NODE >& C, \
3894 const Teuchos::RCP<const Map<LO, GO , NODE > >& domainMap, \
3895 const Teuchos::RCP<const Map<LO, GO , NODE > >& rangeMap, \
3896 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3898 template struct MMdetails::AddKernels<SCALAR, LO, GO, NODE>; \
3900 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const CrsMatrix<SCALAR, LO, GO, NODE>& M, \
3901 Teuchos::RCP<const Map<LO, GO, NODE> > targetMap, \
3902 CrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3903 Teuchos::RCP<const Import<LO,GO, NODE> > prototypeImporter, \
3904 bool userAssertsThereAreNoRemotes, \
3905 const std::string& label, \
3906 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3908 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const BlockCrsMatrix<SCALAR, LO, GO, NODE>& M, \
3909 Teuchos::RCP<const Map<LO, GO, NODE> > targetMap, \
3910 BlockCrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3911 Teuchos::RCP<const Import<LO,GO, NODE> > prototypeImporter, \
3912 bool userAssertsThereAreNoRemotes);
Matrix Market file readers and writers for Tpetra objects.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static void writeSparseFile(const std::string &filename, const sparse_matrix_type &matrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeCrsMatrixZeros(CrsMatrixType &matrix, typename Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitudeType const &threshold=Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitude(Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::zero()))
Remove zero entries from a matrix.
size_t global_size_t
Global size_t object.