2236 (
static_cast<size_t> (indices.size ()) <
theNumEntries,std::runtime_error,
2237 "Specified storage (size==" << indices.size () <<
") does not suffice "
2238 "to hold all " <<
theNumEntries <<
" entry/ies for this row.");
2241 if (
rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2242 if (isLocallyIndexed ()) {
2248 else if (isGloballyIndexed ()) {
2249 auto gblInds = getGlobalIndsViewHost(rowinfo);
2250 for (
size_t j = 0; j < theNumEntries; ++j) {
2251 indices[j] = colMap_->getLocalElement (gblInds(j));
2258 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2262 nonconst_global_inds_host_view_type &indices,
2263 size_t& numEntries)
const
2265 using Teuchos::ArrayView;
2273 static_cast<size_t> (indices.size ()) <
theNumEntries, std::runtime_error,
2274 "Specified storage (size==" << indices.size () <<
") does not suffice "
2275 "to hold all " <<
theNumEntries <<
" entry/ies for this row.");
2278 if (
rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2279 if (isLocallyIndexed ()) {
2282 indices[
j] = colMap_->getGlobalElement (
lclInds(
j));
2285 else if (isGloballyIndexed ()) {
2295 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2300 local_inds_host_view_type &indices)
const
2305 (isGloballyIndexed (), std::runtime_error,
"The graph's indices are "
2306 "currently stored as global indices, so we cannot return a view with "
2307 "local column indices, whether or not the graph has a column Map. If "
2308 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2311 if (
rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2313 indices = lclIndsUnpacked_wdv.getHostSubview(
rowInfo.offset1D,
2320 indices = local_inds_host_view_type();
2325 (
static_cast<size_t> (indices.size ()) !=
2326 getNumEntriesInLocalRow (localRow), std::logic_error,
"indices.size() "
2327 "= " << indices.extent(0) <<
" != getNumEntriesInLocalRow(localRow=" <<
2328 localRow <<
") = " << getNumEntriesInLocalRow(localRow) <<
2329 ". Please report this bug to the Tpetra developers.");
2334 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2339 global_inds_host_view_type &indices)
const
2344 (isLocallyIndexed (), std::runtime_error,
"The graph's indices are "
2345 "currently stored as local indices, so we cannot return a view with "
2346 "global column indices. Use getGlobalRowCopy() instead.");
2351 if (
rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2353 indices = gblInds_wdv.getHostSubview(
rowInfo.offset1D,
2358 indices =
typename global_inds_dualv_type::t_host::const_type();
2362 (
static_cast<size_t> (indices.size ()) !=
2364 std::logic_error,
"indices.size() = " << indices.extent(0)
2365 <<
" != getNumEntriesInGlobalRow(globalRow=" <<
globalRow <<
") = "
2367 <<
". Please report this bug to the Tpetra developers.");
2372 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2376 const Teuchos::ArrayView<const LocalOrdinal>& indices)
2381 (! isFillActive (), std::runtime_error,
"Fill must be active.");
2383 (isGloballyIndexed (), std::runtime_error,
2384 "Graph indices are global; use insertGlobalIndices().");
2386 (! hasColMap (), std::runtime_error,
2387 "Cannot insert local indices without a column Map.");
2389 (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2390 "Local row index " << localRow <<
" is not in the row Map "
2391 "on the calling process.");
2392 if (! indicesAreAllocated ()) {
2393 allocateIndices (LocalIndices, verbose_);
2402 using Teuchos::Array;
2403 using Teuchos::toString;
2405 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2410 for (size_type
k = 0;
k < indices.size (); ++
k) {
2411 if (!
colMap.isNodeLocalElement (indices[
k])) {
2417 std::ostringstream
os;
2418 os <<
"Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2419 "entries in owned row " << localRow <<
", at the following column "
2420 "indices: " << toString (indices) <<
"." <<
endl;
2421 os <<
"Of those, the following indices are not in the column Map on "
2422 "this process: " << toString (
badColInds) <<
"." <<
endl <<
"Since "
2423 "the graph has a column Map already, it is invalid to insert entries "
2424 "at those locations.";
2430 insertLocalIndicesImpl (localRow, indices);
2434 (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2435 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2436 "! isLocallyIndexed() is true. Please report this bug to the "
2437 "Tpetra developers.");
2441 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2449 this->insertLocalIndices (localRow,
indsT);
2453 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2464 (this->isLocallyIndexed (), std::runtime_error,
2465 "graph indices are local; use insertLocalIndices().");
2471 (! this->isFillActive (), std::runtime_error,
2472 "You are not allowed to call this method if fill is not active. "
2473 "If fillComplete has been called, you must first call resumeFill "
2474 "before you may insert indices.");
2475 if (! indicesAreAllocated ()) {
2476 allocateIndices (GlobalIndices, verbose_);
2478 const LO
lclRow = this->rowMap_->getLocalElement (
gblRow);
2479 if (
lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2481 if (this->hasColMap ()) {
2497 std::ostringstream
os;
2498 os <<
"You attempted to insert entries in owned row " <<
gblRow
2499 <<
", at the following column indices: [";
2506 os <<
"]." <<
endl <<
"Of those, the following indices are not in "
2507 "the column Map on this process: [";
2514 os <<
"]." <<
endl <<
"Since the matrix has a column Map already, "
2515 "it is invalid to insert entries at those locations.";
2517 (
true, std::invalid_argument,
os.str ());
2530 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2541 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2550 const char tfecfFuncName[] =
"insertGlobalIndicesFiltered: ";
2553 (this->isLocallyIndexed (), std::runtime_error,
2554 "Graph indices are local; use insertLocalIndices().");
2560 (! this->isFillActive (), std::runtime_error,
2561 "You are not allowed to call this method if fill is not active. "
2562 "If fillComplete has been called, you must first call resumeFill "
2563 "before you may insert indices.");
2564 if (! indicesAreAllocated ()) {
2565 allocateIndices (GlobalIndices, verbose_);
2570 if (! colMap_.is_null ()) {
2581 if (
lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2605 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2624 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2631 ! isFillActive (), std::runtime_error,
"requires that fill is active.");
2633 isStorageOptimized (), std::runtime_error,
2634 "cannot remove indices after optimizeStorage() has been called.");
2636 isGloballyIndexed (), std::runtime_error,
"graph indices are global.");
2638 ! rowMap_->isNodeLocalElement (
lrow), std::runtime_error,
2639 "Local row " <<
lrow <<
" is not in the row Map on the calling process.");
2640 if (! indicesAreAllocated ()) {
2641 allocateIndices (LocalIndices, verbose_);
2644 if (k_numRowEntries_.extent (0) != 0) {
2645 this->k_numRowEntries_(
lrow) = 0;
2650 (getNumEntriesInLocalRow (
lrow) != 0 ||
2651 ! indicesAreAllocated () ||
2652 ! isLocallyIndexed (), std::logic_error,
2653 "Violated stated post-conditions. Please contact Tpetra team.");
2658 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2662 const typename local_graph_device_type::entries_type::non_const_type&
columnIndices)
2665 ProfilingRegion
region (
"Tpetra::CrsGraph::setAllIndices");
2668 ! hasColMap () || getColMap ().
is_null (), std::runtime_error,
2669 "The graph must have a column Map before you may call this method.");
2676 std::runtime_error,
"Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2681 std::runtime_error,
"rowPointers.size() = " <<
rowPtrLen <<
2682 " != this->getLocalNumRows()+1 = " << (
numLocalRows + 1) <<
".");
2687 using exec_space =
typename local_graph_device_type::execution_space;
2690 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0,
columnIndices.extent(0)),
2697 auto comm = this->getComm();
2706 message = std::string(
"ERROR, rank ") + std::to_string(comm->getRank()) +
", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2709 throw std::invalid_argument(
"CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2713 if (debug_ && this->isSorted()) {
2716 using exec_space =
typename local_graph_device_type::execution_space;
2717 using size_type =
typename local_graph_device_type::size_type;
2718 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0,
numLocalRows),
2733 auto comm = this->getComm();
2734 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX,
notSorted,
2742 message = std::string(
"ERROR, rank ") + std::to_string(comm->getRank()) +
", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2745 throw std::invalid_argument(
"CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2749 indicesAreAllocated_ =
true;
2750 indicesAreLocal_ =
true;
2751 indicesAreSorted_ =
true;
2752 noRedundancies_ =
true;
2754 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2757 set_need_sync_host_uvm_access();
2761 storageStatus_ = Details::STORAGE_1D_PACKED;
2766 numAllocForAllRows_ = 0;
2767 k_numAllocPerRow_ =
decltype (k_numAllocPerRow_) ();
2769 checkInternalState ();
2773 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2780 typedef typename local_graph_device_type::row_map_type
row_map_type;
2781 typedef typename row_map_type::array_layout
layout_type;
2784 Kokkos::MemoryUnmanaged> input_view_type;
2788 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2794#ifdef KOKKOS_ENABLE_CXX17
2795 if constexpr (
same) {
2809 Kokkos::Impl::if_c<
same,
2818 std::is_same<
typename row_map_type::memory_space,
2819 Kokkos::HostSpace>::value;
2842 Kokkos::View<LocalOrdinal*, layout_type, device_type>
k_ind =
2843 Kokkos::Compat::getKokkosViewDeepCopy<device_type> (
columnIndices ());
2848 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2853 using Teuchos::Comm;
2854 using Teuchos::outArg;
2857 using Teuchos::REDUCE_MAX;
2858 using Teuchos::REDUCE_MIN;
2859 using Teuchos::reduceAll;
2864 using size_type =
typename Teuchos::Array<GO>::size_type;
2867 std::unique_ptr<std::string>
prefix;
2869 prefix = this->createPrefix(
"CrsGraph",
"globalAssemble");
2870 std::ostringstream
os;
2872 std::cerr <<
os.str();
2877 (! isFillActive (), std::runtime_error,
"Fill must be active before "
2878 "you may call this method.");
2893 std::ostringstream
os;
2895 std::cerr <<
os.str();
2899 else if (verbose_) {
2900 std::ostringstream
os;
2901 os << *
prefix <<
"At least 1 process has nonlocal rows"
2903 std::cerr <<
os.str();
2922 for (
auto mapIter = this->nonlocals_.begin ();
2923 mapIter != this->nonlocals_.end ();
2953 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2958 std::ostringstream
os;
2959 os << *
prefix <<
"nonlocalRowMap->getIndexBase()="
2961 std::cerr <<
os.str();
2973 for (
auto mapIter = this->nonlocals_.begin ();
2974 mapIter != this->nonlocals_.end ();
2983 std::ostringstream
os;
2985 std::cerr <<
os.str();
3001 std::ostringstream
os;
3003 std::cerr <<
os.str();
3011 std::ostringstream
os;
3012 os << *
prefix <<
"Original row Map is NOT 1-to-1" <<
endl;
3013 std::cerr <<
os.str();
3030 std::ostringstream
os;
3032 std::cerr <<
os.str();
3043 std::ostringstream
os;
3045 std::cerr <<
os.str();
3057 checkInternalState ();
3059 std::ostringstream
os;
3061 std::cerr <<
os.str();
3066 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3071 clearGlobalConstants();
3072 if (
params != Teuchos::null) this->setParameterList (
params);
3074 indicesAreSorted_ =
true;
3075 noRedundancies_ =
true;
3076 fillComplete_ =
false;
3080 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3095 Teuchos::RCP<const map_type>
domMap = this->getDomainMap ();
3097 domMap = this->getRowMap ();
3099 Teuchos::RCP<const map_type>
ranMap = this->getRangeMap ();
3101 ranMap = this->getRowMap ();
3107 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3110 fillComplete (
const Teuchos::RCP<const map_type>& domainMap,
3111 const Teuchos::RCP<const map_type>&
rangeMap,
3112 const Teuchos::RCP<Teuchos::ParameterList>&
params)
3116 const bool verbose = verbose_;
3118 std::unique_ptr<std::string>
prefix;
3120 prefix = this->createPrefix(
"CrsGraph",
"fillComplete");
3121 std::ostringstream
os;
3123 std::cerr <<
os.str();
3127 (! isFillActive () || isFillComplete (), std::runtime_error,
3128 "Graph fill state must be active (isFillActive() "
3129 "must be true) before calling fillComplete().");
3131 const int numProcs = getComm ()->getSize ();
3139 if (!
params.is_null ()) {
3140 if (
params->isParameter (
"sort column map ghost gids")) {
3141 sortGhostsAssociatedWithEachProcessor_ =
3142 params->get<
bool> (
"sort column map ghost gids",
3143 sortGhostsAssociatedWithEachProcessor_);
3145 else if (
params->isParameter (
"Sort column Map ghost GIDs")) {
3146 sortGhostsAssociatedWithEachProcessor_ =
3147 params->get<
bool> (
"Sort column Map ghost GIDs",
3148 sortGhostsAssociatedWithEachProcessor_);
3155 if (!
params.is_null ()) {
3163 if (! indicesAreAllocated ()) {
3166 allocateIndices (LocalIndices, verbose);
3169 allocateIndices (GlobalIndices, verbose);
3186 std::ostringstream
os;
3187 os << *
prefix <<
"Do not need to call globalAssemble; "
3188 "assertNoNonlocalInserts="
3192 std::cerr <<
os.str();
3197 std::ostringstream
os;
3199 Details::Impl::verbosePrintMap(
3200 os, nonlocals_.begin(), nonlocals_.end(),
3201 nonlocals_.size(),
"nonlocals_");
3202 std::cerr <<
os.str() <<
endl;
3206 auto map = this->getMap();
3207 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
3209 if (! comm.is_null()) {
3210 using Teuchos::REDUCE_MAX;
3211 using Teuchos::reduceAll;
3218 "least one process in the CrsGraph's communicator. This "
3219 "means either that you incorrectly set the "
3220 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3221 "or that you inserted invalid entries. "
3222 "Rerun with the environment variable TPETRA_VERBOSE="
3223 "CrsGraph set to see the entries of nonlocals_ on every "
3224 "MPI process (WARNING: lots of output).");
3229 "nonlocals_.size()=" <<
numNonlocals <<
" != 0 on the "
3230 "calling process. This means either that you incorrectly "
3231 "set the \"No Nonlocal Changes\" fillComplete parameter "
3232 "to true, or that you inserted invalid entries. "
3233 "Rerun with the environment "
3234 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3235 "of nonlocals_ on every MPI process (WARNING: lots of "
3248 Teuchos::Array<int> remotePIDs (0);
3251 this->makeColMap (remotePIDs);
3257 this->makeIndicesLocal(verbose);
3262 using Teuchos::REDUCE_MIN;
3263 using Teuchos::reduceAll;
3264 using Teuchos::outArg;
3268 if (!
map.is_null ()) {
3269 comm =
map->getComm ();
3271 if (comm.is_null ()) {
3281 std::ostringstream
os;
3284 (
true, std::runtime_error,
os.str ());
3301 this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3309 this->fillLocalGraph (
params);
3312 params->get (
"compute global constants",
true);
3314 this->computeGlobalConstants ();
3317 this->computeLocalConstants ();
3319 this->fillComplete_ =
true;
3320 this->checkInternalState ();
3323 std::ostringstream
os;
3325 std::cerr <<
os.str();
3330 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3334 const Teuchos::RCP<const map_type>&
rangeMap,
3335 const Teuchos::RCP<const import_type>&
importer,
3336 const Teuchos::RCP<const export_type>&
exporter,
3337 const Teuchos::RCP<Teuchos::ParameterList>&
params)
3340#ifdef HAVE_TPETRA_MMM_TIMINGS
3343 label =
params->get(
"Timer Label",label);
3344 std::string
prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
3345 using Teuchos::TimeMonitor;
3346 Teuchos::RCP<Teuchos::TimeMonitor>
MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-Setup"))));
3352 std::runtime_error,
"The input domain Map and range Map must be nonnull.");
3354 isFillComplete () || ! hasColMap (), std::runtime_error,
"You may not "
3355 "call this method unless the graph has a column Map.");
3357 getLocalNumRows () > 0 && rowPtrsUnpacked_host_.extent (0) == 0,
3358 std::runtime_error,
"The calling process has getLocalNumRows() = "
3359 << getLocalNumRows () <<
" > 0 rows, but the row offsets array has not "
3362 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) != getLocalNumRows () + 1,
3363 std::runtime_error,
"The row offsets array has length " <<
3364 rowPtrsUnpacked_host_.extent (0) <<
" != getLocalNumRows()+1 = " <<
3365 (getLocalNumRows () + 1) <<
".");
3380 numAllocForAllRows_ = 0;
3381 k_numAllocPerRow_ =
decltype (k_numAllocPerRow_) ();
3382 indicesAreAllocated_ =
true;
3387 indicesAreLocal_ =
true;
3388 indicesAreGlobal_ =
false;
3391#ifdef HAVE_TPETRA_MMM_TIMINGS
3393 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-Maps"))));
3398 indicesAreSorted_ =
true;
3399 noRedundancies_ =
true;
3402#ifdef HAVE_TPETRA_MMM_TIMINGS
3404 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-mIXcheckI"))));
3407 importer_ = Teuchos::null;
3408 exporter_ = Teuchos::null;
3411 !
importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3412 !
importer->getTargetMap ()->isSameAs (*getColMap ()),
3413 std::invalid_argument,
": importer does not match matrix maps.");
3418#ifdef HAVE_TPETRA_MMM_TIMINGS
3420 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-mIXcheckE"))));
3425 !
exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3426 !
exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3427 std::invalid_argument,
": exporter does not match matrix maps.");
3431#ifdef HAVE_TPETRA_MMM_TIMINGS
3433 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-mIXmake"))));
3435 Teuchos::Array<int> remotePIDs (0);
3436 this->makeImportExport (remotePIDs,
false);
3438#ifdef HAVE_TPETRA_MMM_TIMINGS
3440 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-fLG"))));
3442 this->fillLocalGraph (
params);
3445 params->get (
"compute global constants",
true);
3448#ifdef HAVE_TPETRA_MMM_TIMINGS
3450 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-cGC (const)"))));
3452 this->computeGlobalConstants ();
3455#ifdef HAVE_TPETRA_MMM_TIMINGS
3457 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-cGC (noconst)"))));
3459 this->computeLocalConstants ();
3462 fillComplete_ =
true;
3464#ifdef HAVE_TPETRA_MMM_TIMINGS
3466 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-cIS"))));
3468 checkInternalState ();
3472 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3477 using ::Tpetra::Details::computeOffsetsFromCounts;
3479 typedef typename local_graph_device_type::row_map_type
row_map_type;
3481 typedef typename local_graph_device_type::entries_type::non_const_type
lclinds_1d_type;
3482 const char tfecfFuncName[] =
"fillLocalGraph (called from fillComplete or "
3483 "expertStaticFillComplete): ";
3484 const size_t lclNumRows = this->getLocalNumRows ();
3490 if (!
params.is_null () && !
params->get (
"Optimize Storage",
true)) {
3500 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3501 (rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3502 "k_rowPtrs_ has size zero, but shouldn't");
3503 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3504 (rowPtrsUnpacked_host_.extent (0) != lclNumRows + 1, std::logic_error,
3505 "rowPtrsUnpacked_host_.extent(0) = "
3506 << rowPtrsUnpacked_host_.extent (0) <<
" != (lclNumRows + 1) = "
3507 << (lclNumRows + 1) <<
".");
3508 const size_t numOffsets = rowPtrsUnpacked_host_.extent (0);
3509 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets-1);
3510 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3512 lclIndsUnpacked_wdv.extent (0) != valToCheck,
3513 std::logic_error,
"numOffsets=" << numOffsets <<
" != 0 "
3514 " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3515 <<
" != k_rowPtrs_(" << numOffsets <<
")=" << valToCheck
3519 size_t allocSize = 0;
3521 allocSize = this->getLocalAllocationSize ();
3523 catch (std::logic_error& e) {
3524 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3525 (
true, std::logic_error,
"getLocalAllocationSize threw "
3526 "std::logic_error: " << e.what ());
3528 catch (std::runtime_error& e) {
3529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3530 (
true, std::runtime_error,
"getLocalAllocationSize threw "
3531 "std::runtime_error: " << e.what ());
3533 catch (std::exception& e) {
3534 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3535 (
true, std::runtime_error,
"getLocalAllocationSize threw "
3536 "std::exception: " << e.what ());
3539 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3540 (
true, std::runtime_error,
"getLocalAllocationSize threw "
3541 "an exception not a subclass of std::exception.");
3544 if (this->getLocalNumEntries () != allocSize) {
3547 non_const_row_map_type ptr_d;
3548 row_map_type ptr_d_const;
3557 if (rowPtrsUnpacked_host_.extent (0) != 0) {
3558 const size_t numOffsets =
3559 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0));
3560 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets - 1);
3561 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3562 (valToCheck !=
size_t(lclIndsUnpacked_wdv.extent(0)),
3563 std::logic_error,
"(Unpacked branch) Before allocating "
3564 "or packing, k_rowPtrs_(" << (numOffsets-1) <<
")="
3565 << valToCheck <<
" != lclIndsUnpacked_wdv.extent(0)="
3566 << lclIndsUnpacked_wdv.extent (0) <<
".");
3576 size_t lclTotalNumEntries = 0;
3580 non_const_row_map_type (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
3581 ptr_d_const = ptr_d;
3585 typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3587 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3588 (
size_t(numRowEnt_h.extent (0)) != lclNumRows,
3589 std::logic_error,
"(Unpacked branch) "
3590 "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3591 <<
" != getLocalNumRows()=" << lclNumRows <<
"");
3597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3598 (
static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3599 std::logic_error,
"(Unpacked branch) After allocating "
3600 "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3601 <<
" != lclNumRows+1 = " << (lclNumRows+1) <<
".");
3602 const auto valToCheck =
3603 ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3604 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3605 (valToCheck != lclTotalNumEntries, std::logic_error,
3606 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3607 "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3608 <<
") = " << valToCheck <<
" != total number of entries "
3609 "on the calling process = " << lclTotalNumEntries
3615 lclinds_1d_type ind_d =
3616 lclinds_1d_type (
"Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3628 typedef pack_functor<
3629 typename local_graph_device_type::entries_type::non_const_type,
3630 typename local_inds_dualv_type::t_dev::const_type,
3632 typename local_graph_device_type::row_map_type> inds_packer_type;
3633 inds_packer_type f (ind_d,
3634 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3635 ptr_d, rowPtrsUnpacked_dev_);
3637 typedef typename decltype (ind_d)::execution_space exec_space;
3638 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3639 Kokkos::parallel_for (range_type (0, lclNumRows), f);
3643 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3644 (ptr_d.extent (0) == 0, std::logic_error,
3645 "(\"Optimize Storage\"=true branch) After packing, "
3646 "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
3647 "never allocated.");
3648 if (ptr_d.extent (0) != 0) {
3649 const size_t numOffsets =
static_cast<size_t> (ptr_d.extent (0));
3650 const auto valToCheck =
3651 ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3652 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3653 (
static_cast<size_t> (valToCheck) != ind_d.extent (0),
3654 std::logic_error,
"(\"Optimize Storage\"=true branch) "
3655 "After packing, ptr_d(" << (numOffsets-1) <<
")="
3656 << valToCheck <<
" != ind_d.extent(0)="
3657 << ind_d.extent(0) <<
".");
3661 if (requestOptimizedStorage)
3662 setRowPtrs(ptr_d_const);
3664 setRowPtrsPacked(ptr_d_const);
3665 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3669 rowPtrsPacked_dev_ = rowPtrsUnpacked_dev_;
3670 rowPtrsPacked_host_ = rowPtrsUnpacked_host_;
3671 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3675 (rowPtrsPacked_dev_.extent (0) == 0, std::logic_error,
3676 "(\"Optimize Storage\"=false branch) "
3677 "rowPtrsPacked_dev_.extent(0) = 0. "
3678 "This probably means that "
3679 "k_rowPtrs_ was never allocated.");
3680 if (rowPtrsPacked_dev_.extent (0) != 0) {
3681 const size_t numOffsets =
3682 static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
3683 const size_t valToCheck =
3684 rowPtrsPacked_host_(numOffsets - 1);
3685 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3686 (valToCheck !=
size_t(lclIndsPacked_wdv.extent (0)),
3687 std::logic_error,
"(\"Optimize Storage\"=false branch) "
3688 "rowPtrsPacked_dev_(" << (numOffsets-1) <<
")="
3690 <<
" != lclIndsPacked_wdv.extent(0)="
3691 << lclIndsPacked_wdv.extent (0) <<
".");
3697 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3698 (
static_cast<size_t> (rowPtrsPacked_dev_.extent (0)) != lclNumRows + 1,
3699 std::logic_error,
"After packing, rowPtrsPacked_dev_.extent(0) = " <<
3700 rowPtrsPacked_dev_.extent (0) <<
" != lclNumRows+1 = " << (lclNumRows+1)
3702 if (rowPtrsPacked_dev_.extent (0) != 0) {
3703 const size_t numOffsets =
static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
3704 const auto valToCheck = rowPtrsPacked_host_(numOffsets - 1);
3705 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3706 (
static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3707 std::logic_error,
"After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3708 <<
") = " << valToCheck <<
" != lclIndsPacked_wdv.extent(0) = "
3709 << lclIndsPacked_wdv.extent (0) <<
".");
3713 if (requestOptimizedStorage) {
3719 k_numRowEntries_ = row_entries_type ();
3722 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3724 storageStatus_ = Details::STORAGE_1D_PACKED;
3727 set_need_sync_host_uvm_access();
3730 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3744 isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3745 "Requires matching maps and non-static graph.");
3749 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3753 const Teuchos::RCP<const import_type>&
newImport,
3756 using Teuchos::REDUCE_MIN;
3757 using Teuchos::reduceAll;
3761 typedef typename local_inds_dualv_type::t_host
col_inds_type;
3765 isFillComplete (), std::runtime_error,
"The graph is fill complete "
3766 "(isFillComplete() returns true). You must call resumeFill() before "
3767 "you may call this method.");
3785 const LO
lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3814 auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3819 if (indicesAreAllocated ()) {
3820 if (isLocallyIndexed ()) {
3824 const size_t allocSize = this->getLocalAllocationSize ();
3834 if (
oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3843 if (
gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3849 if (
newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3884 for (
size_t k = 0;
k <
rowInfo.numEntries; ++
k) {
3903 getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3904 if (! comm.is_null ()) {
3909 gblSuccess[0] == 0, std::runtime_error,
"It is not possible to continue."
3910 " The most likely reason is that the graph is locally indexed, but the "
3911 "column Map is missing (null) on some processes, due to a previous call "
3912 "to replaceColMap().");
3915 gblSuccess[1] == 0, std::runtime_error,
"On some process, the graph "
3916 "contains column indices that are in the old column Map, but not in the "
3917 "new column Map (on that process). This method does NOT redistribute "
3918 "data; it does not claim to do the work of an Import or Export operation."
3919 " This means that for all processess, the calling process MUST own all "
3920 "column indices, in both the old column Map and the new column Map. In "
3921 "this case, you will need to do an Import or Export operation to "
3922 "redistribute data.");
3925 if (isLocallyIndexed ()) {
3928 Kokkos::view_alloc(
"Tpetra::CrsGraph::lclIndReindexed",
3929 Kokkos::WithoutInitializing),
3941 indicesAreSorted_ =
false;
3949 const bool sorted =
false;
3950 const bool merged =
true;
3964 if (! domainMap_.is_null ()) {
3965 if (! domainMap_->isSameAs (*
newColMap)) {
3968 importer_ = Teuchos::null;
3977 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3982 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMap: ";
3984 colMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
3985 "this method unless the graph already has a column Map.");
3988 prefix <<
"The new domain Map must be nonnull.");
3991 Teuchos::RCP<const import_type>
newImporter = Teuchos::null;
3998 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4002 const Teuchos::RCP<const import_type>&
newImporter)
4004 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4006 colMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
4007 "this method unless the graph already has a column Map.");
4010 prefix <<
"The new domain Map must be nonnull.");
4020 (!
colSameAsDom, std::invalid_argument,
"If the new Import is null, "
4021 "then the new domain Map must be the same as the current column Map.");
4025 colMap_->isSameAs (* (
newImporter->getTargetMap ()));
4030 "new Import is nonnull, then the current column Map must be the same "
4031 "as the new Import's target Map, and the new domain Map must be the "
4032 "same as the new Import's source Map.");
4037 importer_ = Teuchos::rcp_const_cast<import_type> (
newImporter);
4040 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4045 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMap: ";
4047 rowMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
4048 "this method unless the graph already has a row Map.");
4051 prefix <<
"The new range Map must be nonnull.");
4054 Teuchos::RCP<const export_type>
newExporter = Teuchos::null;
4061 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4065 const Teuchos::RCP<const export_type>&
newExporter)
4067 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4069 rowMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
4070 "this method unless the graph already has a column Map.");
4073 prefix <<
"The new domain Map must be nonnull.");
4083 (!
rowSameAsRange, std::invalid_argument,
"If the new Export is null, "
4084 "then the new range Map must be the same as the current row Map.");
4090 rowMap_->isSameAs (* (
newExporter->getSourceMap ()));
4093 "new Export is nonnull, then the current row Map must be the same "
4094 "as the new Export's source Map, and the new range Map must be the "
4095 "same as the new Export's target Map.");
4100 exporter_ = Teuchos::rcp_const_cast<export_type> (
newExporter);
4104 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4110 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4111 rowPtrsPacked_dev_);
4114 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4119 return local_graph_host_type(
4120 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4121 rowPtrsPacked_host_);
4124 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4129 using ::Tpetra::Details::ProfilingRegion;
4130 using Teuchos::ArrayView;
4131 using Teuchos::outArg;
4132 using Teuchos::reduceAll;
4135 ProfilingRegion
regionCGC (
"Tpetra::CrsGraph::computeGlobalConstants");
4137 this->computeLocalConstants ();
4142 if (! this->haveGlobalConstants_) {
4143 const Teuchos::Comm<int>& comm = * (this->getComm ());
4157 lcl =
static_cast<GST> (this->getLocalNumEntries ());
4160 this->globalNumEntries_ =
gbl;
4164 outArg (this->globalMaxNumRowEntries_));
4165 this->haveGlobalConstants_ =
true;
4170 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4175 using ::Tpetra::Details::ProfilingRegion;
4177 ProfilingRegion
regionCLC (
"Tpetra::CrsGraph::computeLocalConstants");
4178 if (this->haveLocalConstants_) {
4183 this->nodeMaxNumRowEntries_ =
4184 Teuchos::OrdinalTraits<size_t>::invalid();
4188 auto ptr = this->rowPtrsPacked_dev_;
4190 static_cast<LO
> (0) :
4191 (
static_cast<LO
> (
ptr.extent(0)) -
static_cast<LO
> (1));
4194 ::Tpetra::Details::maxDifference (
"Tpetra::CrsGraph: nodeMaxNumRowEntries",
4197 this->haveLocalConstants_ =
true;
4201 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4202 std::pair<size_t, std::string>
4207 using Teuchos::arcp;
4208 using Teuchos::Array;
4213 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4215 typedef typename row_entries_type::non_const_value_type
num_ent_type;
4219 std::unique_ptr<std::string>
prefix;
4221 prefix = this->createPrefix(
"CrsGraph",
"makeIndicesLocal");
4222 std::ostringstream
os;
4223 os << *
prefix <<
"lclNumRows: " << getLocalNumRows() <<
endl;
4224 std::cerr <<
os.str();
4230 (! this->hasColMap (), std::logic_error,
"The graph does not have a "
4231 "column Map yet. This method should never be called in that case. "
4232 "Please report this bug to the Tpetra developers.");
4234 (this->getColMap ().
is_null (), std::logic_error,
"The graph claims "
4235 "that it has a column Map, because hasColMap() returns true. However, "
4236 "the result of getColMap() is null. This should never happen. Please "
4237 "report this bug to the Tpetra developers.");
4245 const LO
lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4248 if (this->isGloballyIndexed () &&
lclNumRows != 0) {
4250 typename row_entries_type::const_type
h_numRowEnt =
4251 this->k_numRowEntries_;
4254 if (rowPtrsUnpacked_host_.extent (0) == 0) {
4255 errStrm <<
"k_rowPtrs_.extent(0) == 0. This should never "
4256 "happen here. Please report this bug to the Tpetra developers."
4259 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4271 using Kokkos::view_alloc;
4272 using Kokkos::WithoutInitializing;
4282 const std::string label (
"Tpetra::CrsGraph::lclInd");
4284 std::ostringstream
os;
4285 os << *
prefix <<
"(Re)allocate lclInd_wdv: old="
4286 << lclIndsUnpacked_wdv.extent(0) <<
", new=" <<
numEnt <<
endl;
4287 std::cerr <<
os.str();
4304 std::ostringstream
os;
4305 os << *
prefix <<
"Allocate device mirror k_numRowEnt: "
4307 std::cerr <<
os.str();
4312 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4315 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4316 gblInds_wdv.getDeviceView(Access::ReadOnly),
4317 rowPtrsUnpacked_dev_,
4321 const int myRank = [
this] () {
4322 auto map = this->getMap ();
4323 if (
map.is_null ()) {
4327 auto comm =
map->getComm ();
4328 return comm.is_null () ? 0 : comm->getRank ();
4332 errStrm <<
"(Process " <<
myRank <<
") When converting column "
4333 "indices from global to local, we encountered " <<
lclNumErrs
4336 <<
" not live in the column Map on this process." <<
endl;
4343 std::ostringstream
os;
4344 os << *
prefix <<
"Free gblInds_wdv: "
4345 << gblInds_wdv.extent(0) <<
endl;
4346 std::cerr <<
os.str();
4348 gblInds_wdv = global_inds_wdv_type ();
4351 this->indicesAreLocal_ =
true;
4352 this->indicesAreGlobal_ =
false;
4353 this->checkInternalState ();
4358 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4368 std::unique_ptr<std::string>
prefix;
4371 std::ostringstream
os;
4373 std::cerr <<
os.str();
4381 Teuchos::RCP<const map_type>
colMap = this->colMap_;
4383 this->sortGhostsAssociatedWithEachProcessor_;
4393 using Teuchos::outArg;
4394 using Teuchos::REDUCE_MIN;
4395 using Teuchos::reduceAll;
4401 auto comm = this->getComm ();
4402 if (! comm.is_null ()) {
4408 std::ostringstream
os;
4411 (
true, std::runtime_error,
": An error happened on at "
4412 "least one process in the CrsGraph's communicator. "
4413 "Here are all processes' error messages:" << std::endl
4427 checkInternalState ();
4429 std::ostringstream
os;
4431 std::cerr <<
os.str();
4436 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4443 using host_execution_space =
4444 typename Kokkos::View<LO*, device_type>::HostMirror::
4446 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4449 (
"Tpetra::CrsGraph::sortAndMergeAllIndices");
4451 std::unique_ptr<std::string>
prefix;
4454 std::ostringstream
os;
4456 <<
"sorted=" << (
sorted ?
"true" :
"false")
4457 <<
", merged=" << (
merged ?
"true" :
"false") <<
endl;
4458 std::cerr <<
os.str();
4460 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4461 (this->isGloballyIndexed(), std::logic_error,
4462 "This method may only be called after makeIndicesLocal." );
4463 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4464 (! merged && this->isStorageOptimized(), std::logic_error,
4465 "The graph is already storage optimized, so we shouldn't be "
4466 "merging any indices. "
4467 "Please report this bug to the Tpetra developers.");
4469 if (! sorted || ! merged) {
4470 const LO lclNumRows(this->getLocalNumRows());
4471 auto range = range_type(0, lclNumRows);
4474 size_t totalNumDups = 0;
4475 Kokkos::parallel_reduce(range,
4476 [
this, sorted, merged] (
const LO lclRow,
size_t& numDups)
4478 const RowInfo rowInfo = this->getRowInfo(lclRow);
4479 numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4482 std::ostringstream os;
4483 os << *prefix <<
"totalNumDups=" << totalNumDups << endl;
4484 std::cerr << os.str();
4487 Kokkos::parallel_for(range,
4488 [
this, sorted, merged] (
const LO lclRow)
4490 const RowInfo rowInfo = this->getRowInfo(lclRow);
4491 this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4494 this->indicesAreSorted_ =
true;
4495 this->noRedundancies_ =
true;
4499 std::ostringstream os;
4500 os << *prefix <<
"Done" << endl;
4501 std::cerr << os.str();
4505 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4511 using ::Tpetra::Details::ProfilingRegion;
4512 using Teuchos::ParameterList;
4516 ProfilingRegion
regionMIE (
"Tpetra::CrsGraph::makeImportExport");
4519 (! this->hasColMap (), std::logic_error,
4520 "This method may not be called unless the graph has a column Map.");
4530 if (importer_.is_null ()) {
4532 if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4533 if (
params.is_null () || !
params->isSublist (
"Import")) {
4535 importer_ =
rcp (
new import_type (domainMap_, colMap_, remotePIDs));
4558 if (exporter_.is_null ()) {
4560 if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4561 if (
params.is_null () || !
params->isSublist (
"Export")) {
4573 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4578 std::ostringstream
oss;
4579 oss << dist_object_type::description ();
4580 if (isFillComplete ()) {
4581 oss <<
"{status = fill complete"
4582 <<
", global rows = " << getGlobalNumRows()
4583 <<
", global cols = " << getGlobalNumCols()
4584 <<
", global num entries = " << getGlobalNumEntries()
4588 oss <<
"{status = fill not complete"
4589 <<
", global rows = " << getGlobalNumRows()
4596 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4600 const Teuchos::EVerbosityLevel
verbLevel)
const
4602 using Teuchos::ArrayView;
4603 using Teuchos::Comm;
4605 using Teuchos::VERB_DEFAULT;
4606 using Teuchos::VERB_NONE;
4607 using Teuchos::VERB_LOW;
4608 using Teuchos::VERB_MEDIUM;
4609 using Teuchos::VERB_HIGH;
4610 using Teuchos::VERB_EXTREME;
4620 for (
size_t dec=10;
dec<getGlobalNumRows();
dec *= 10) {
4623 width = std::max<size_t> (
width,
static_cast<size_t> (11)) + 2;
4624 Teuchos::OSTab
tab (
out);
4633 if (
myImageID == 0)
out << this->description() << std::endl;
4635 if (isFillComplete() &&
myImageID == 0) {
4636 out <<
"Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4641 rowMap_->describe(
out,
vl);
4642 if (colMap_ != Teuchos::null) {
4643 if (
myImageID == 0)
out <<
"\nColumn map: " << std::endl;
4644 colMap_->describe(
out,
vl);
4646 if (domainMap_ != Teuchos::null) {
4647 if (
myImageID == 0)
out <<
"\nDomain map: " << std::endl;
4648 domainMap_->describe(
out,
vl);
4650 if (rangeMap_ != Teuchos::null) {
4651 if (
myImageID == 0)
out <<
"\nRange map: " << std::endl;
4652 rangeMap_->describe(
out,
vl);
4660 <<
"Node number of entries = " << this->getLocalNumEntries () << std::endl
4661 <<
"Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4662 if (! indicesAreAllocated ()) {
4663 out <<
"Indices are not allocated." << std::endl;
4675 out << std::setw(
width) <<
"Node ID"
4676 << std::setw(
width) <<
"Global Row"
4677 << std::setw(
width) <<
"Num Entries";
4692 if (isGloballyIndexed()) {
4693 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4694 for (
size_t j=0;
j <
rowinfo.numEntries; ++
j){
4699 else if (isLocallyIndexed()) {
4700 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4701 for (
size_t j=0;
j <
rowinfo.numEntries; ++
j) {
4703 out << colMap_->getGlobalElement(
collid) <<
" ";
4719 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4730 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4735 const size_t numSameIDs,
4736 const Kokkos::DualView<
const local_ordinal_type*,
4738 const Kokkos::DualView<
const local_ordinal_type*,
4743 using LO = local_ordinal_type;
4744 using GO = global_ordinal_type;
4747 const bool verbose = verbose_;
4749 std::unique_ptr<std::string>
prefix;
4751 prefix = this->createPrefix(
"CrsGraph",
"copyAndPermute");
4752 std::ostringstream
os;
4754 std::cerr <<
os.str ();
4757 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4758 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4759 std::runtime_error,
"permuteToLIDs.extent(0) = "
4760 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
4761 << permuteFromLIDs.extent (0) <<
".");
4765 const row_graph_type& srcRowGraph =
4766 dynamic_cast<const row_graph_type&
> (source);
4769 std::ostringstream os;
4770 os << *prefix <<
"Compute padding" << endl;
4771 std::cerr << os.str ();
4773 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4774 permuteToLIDs, permuteFromLIDs, verbose);
4775 applyCrsPadding(*padding, verbose);
4780 const this_CRS_type* srcCrsGraph =
4781 dynamic_cast<const this_CRS_type*
> (&source);
4783 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4784 const map_type& tgtRowMap = *(getRowMap());
4785 const bool src_filled = srcRowGraph.isFillComplete();
4786 nonconst_global_inds_host_view_type row_copy;
4792 if (src_filled || srcCrsGraph ==
nullptr) {
4794 std::ostringstream os;
4795 os << *prefix <<
"src_filled || srcCrsGraph == nullptr" << endl;
4796 std::cerr << os.str ();
4803 for (
size_t i = 0; i < numSameIDs; ++i, ++myid) {
4804 const GO gid = srcRowMap.getGlobalElement (myid);
4805 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4806 Kokkos::resize(row_copy,row_length);
4807 size_t check_row_length = 0;
4808 srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4809 this->insertGlobalIndices (gid, row_length, row_copy.data());
4813 std::ostringstream os;
4814 os << *prefix <<
"! src_filled && srcCrsGraph != nullptr" << endl;
4815 std::cerr << os.str ();
4817 for (
size_t i = 0; i < numSameIDs; ++i, ++myid) {
4818 const GO gid = srcRowMap.getGlobalElement (myid);
4819 global_inds_host_view_type row;
4820 srcCrsGraph->getGlobalRowView (gid, row);
4821 this->insertGlobalIndices (gid, row.extent(0), row.data());
4828 auto permuteToLIDs_h = permuteToLIDs.view_host ();
4829 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4831 if (src_filled || srcCrsGraph ==
nullptr) {
4832 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4833 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4834 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4835 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4836 Kokkos::resize(row_copy,row_length);
4837 size_t check_row_length = 0;
4838 srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4839 this->insertGlobalIndices (mygid, row_length, row_copy.data());
4842 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4843 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4844 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4845 global_inds_host_view_type row;
4846 srcCrsGraph->getGlobalRowView (srcgid, row);
4847 this->insertGlobalIndices (mygid, row.extent(0), row.data());
4852 std::ostringstream os;
4853 os << *prefix <<
"Done" << endl;
4854 std::cerr << os.str ();
4858 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4860 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4861 applyCrsPadding(
const padding_type& padding,
4864 using Details::ProfilingRegion;
4868 using row_ptrs_type =
4869 typename local_graph_device_type::row_map_type::non_const_type;
4870 using range_policy =
4871 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4872 const char tfecfFuncName[] =
"applyCrsPadding";
4873 ProfilingRegion regionCAP(
"Tpetra::CrsGraph::applyCrsPadding");
4875 std::unique_ptr<std::string> prefix;
4877 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4878 std::ostringstream os;
4879 os << *prefix <<
"padding: ";
4882 std::cerr << os.str();
4884 const int myRank = ! verbose ? -1 : [&] () {
4885 auto map = this->getMap();
4886 if (map.is_null()) {
4889 auto comm = map->getComm();
4890 if (comm.is_null()) {
4893 return comm->getRank();
4902 if (! indicesAreAllocated()) {
4904 std::ostringstream os;
4905 os << *prefix <<
"Call allocateIndices" << endl;
4906 std::cerr << os.str();
4908 allocateIndices(GlobalIndices, verbose);
4910 TEUCHOS_ASSERT( indicesAreAllocated() );
4916 std::ostringstream os;
4917 os << *prefix <<
"Allocate row_ptrs_beg: "
4918 << rowPtrsUnpacked_dev_.extent(0) << endl;
4919 std::cerr << os.str();
4921 using Kokkos::view_alloc;
4922 using Kokkos::WithoutInitializing;
4923 row_ptrs_type row_ptrs_beg(
4924 view_alloc(
"row_ptrs_beg", WithoutInitializing),
4925 rowPtrsUnpacked_dev_.extent(0));
4927 Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev_);
4929 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4930 size_t(row_ptrs_beg.extent(0) - 1);
4932 std::ostringstream os;
4933 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
4934 std::cerr << os.str();
4936 row_ptrs_type row_ptrs_end(
4937 view_alloc(
"row_ptrs_end", WithoutInitializing), N);
4938 row_ptrs_type num_row_entries;
4940 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4942 execution_space().fence();
4944 if (refill_num_row_entries) {
4948 row_ptrs_type(view_alloc(
"num_row_entries", WithoutInitializing), N);
4949 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4950 Kokkos::parallel_for
4951 (
"Fill end row pointers", range_policy(0, N),
4952 KOKKOS_LAMBDA (
const size_t i) {
4953 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4960 Kokkos::parallel_for
4961 (
"Fill end row pointers", range_policy(0, N),
4962 KOKKOS_LAMBDA (
const size_t i) {
4963 row_ptrs_end(i) = row_ptrs_beg(i+1);
4967 if (isGloballyIndexed()) {
4969 padding, myRank, verbose);
4972 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4973 padding, myRank, verbose);
4976 if (refill_num_row_entries) {
4977 Kokkos::parallel_for
4978 (
"Fill num entries", range_policy(0, N),
4979 KOKKOS_LAMBDA (
const size_t i) {
4980 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4982 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4985 std::ostringstream os;
4986 os << *prefix <<
"Reassign k_rowPtrs_; old size: "
4987 << rowPtrsUnpacked_dev_.extent(0) <<
", new size: "
4988 << row_ptrs_beg.extent(0) << endl;
4989 std::cerr << os.str();
4990 TEUCHOS_ASSERT( rowPtrsUnpacked_dev_.extent(0) == row_ptrs_beg.extent(0) );
4993 setRowPtrsUnpacked(row_ptrs_beg);
4995 set_need_sync_host_uvm_access();
4998 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5000 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5002 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5004 const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5005 const size_t numSameIDs,
5006 const Kokkos::DualView<
const local_ordinal_type*,
5007 buffer_device_type>& permuteToLIDs,
5008 const Kokkos::DualView<
const local_ordinal_type*,
5009 buffer_device_type>& permuteFromLIDs,
5010 const bool verbose)
const
5015 std::unique_ptr<std::string> prefix;
5018 "computeCrsPadding(same & permute)");
5019 std::ostringstream os;
5020 os << *prefix <<
"{numSameIDs: " << numSameIDs
5021 <<
", numPermutes: " << permuteFromLIDs.extent(0) <<
"}"
5023 std::cerr << os.str();
5026 const int myRank = [&] () {
5027 auto comm = rowMap_.is_null() ? Teuchos::null :
5029 return comm.is_null() ? -1 : comm->getRank();
5031 std::unique_ptr<padding_type> padding(
5032 new padding_type(myRank, numSameIDs,
5033 permuteFromLIDs.extent(0)));
5035 computeCrsPaddingForSameIDs(*padding, source,
5036 static_cast<LO
>(numSameIDs));
5037 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5042 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5044 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5045 computeCrsPaddingForSameIDs(
5046 padding_type& padding,
5047 const RowGraph<local_ordinal_type, global_ordinal_type,
5049 const local_ordinal_type numSameIDs)
const
5052 using GO = global_ordinal_type;
5053 using Details::Impl::getRowGraphGlobalRow;
5055 const char tfecfFuncName[] =
"computeCrsPaddingForSameIds";
5057 std::unique_ptr<std::string> prefix;
5058 const bool verbose = verbose_;
5060 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5061 std::ostringstream os;
5062 os << *prefix <<
"numSameIDs: " << numSameIDs << endl;
5063 std::cerr << os.str();
5066 if (numSameIDs == 0) {
5070 const map_type& srcRowMap = *(source.getRowMap());
5071 const map_type& tgtRowMap = *rowMap_;
5072 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5073 const this_CRS_type* srcCrs =
dynamic_cast<const this_CRS_type*
>(&source);
5074 const bool src_is_unique =
5075 srcCrs ==
nullptr ? false : srcCrs->isMerged();
5076 const bool tgt_is_unique = this->isMerged();
5078 std::vector<GO> srcGblColIndsScratch;
5079 std::vector<GO> tgtGblColIndsScratch;
5081 execute_sync_host_uvm_access();
5082 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5083 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5084 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5085 auto srcGblColInds = getRowGraphGlobalRow(
5086 srcGblColIndsScratch, source, srcGblRowInd);
5087 auto tgtGblColInds = getRowGraphGlobalRow(
5088 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5089 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5090 tgtGblColInds.size(), tgt_is_unique,
5091 srcGblColInds.getRawPtr(),
5092 srcGblColInds.size(), src_is_unique);
5095 std::ostringstream os;
5096 os << *prefix <<
"Done" << endl;
5097 std::cerr << os.str();
5101 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5103 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5104 computeCrsPaddingForPermutedIDs(
5105 padding_type& padding,
5106 const RowGraph<local_ordinal_type, global_ordinal_type,
5108 const Kokkos::DualView<
const local_ordinal_type*,
5109 buffer_device_type>& permuteToLIDs,
5110 const Kokkos::DualView<
const local_ordinal_type*,
5111 buffer_device_type>& permuteFromLIDs)
const
5114 using GO = global_ordinal_type;
5115 using Details::Impl::getRowGraphGlobalRow;
5117 const char tfecfFuncName[] =
"computeCrsPaddingForPermutedIds";
5119 std::unique_ptr<std::string> prefix;
5120 const bool verbose = verbose_;
5122 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5123 std::ostringstream os;
5124 os << *prefix <<
"permuteToLIDs.extent(0): "
5125 << permuteToLIDs.extent(0)
5126 <<
", permuteFromLIDs.extent(0): "
5127 << permuteFromLIDs.extent(0) << endl;
5128 std::cerr << os.str();
5131 if (permuteToLIDs.extent(0) == 0) {
5135 const map_type& srcRowMap = *(source.getRowMap());
5136 const map_type& tgtRowMap = *rowMap_;
5137 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5138 const this_CRS_type* srcCrs =
dynamic_cast<const this_CRS_type*
>(&source);
5139 const bool src_is_unique =
5140 srcCrs ==
nullptr ? false : srcCrs->isMerged();
5141 const bool tgt_is_unique = this->isMerged();
5143 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5144 auto permuteToLIDs_h = permuteToLIDs.view_host();
5145 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5146 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5148 std::vector<GO> srcGblColIndsScratch;
5149 std::vector<GO> tgtGblColIndsScratch;
5150 const LO numPermutes =
static_cast<LO
>(permuteToLIDs_h.extent(0));
5152 execute_sync_host_uvm_access();
5153 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5154 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5155 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5156 auto srcGblColInds = getRowGraphGlobalRow(
5157 srcGblColIndsScratch, source, srcGblRowInd);
5158 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5159 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5160 auto tgtGblColInds = getRowGraphGlobalRow(
5161 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5162 padding.update_permute(whichPermute, tgtLclRowInd,
5163 tgtGblColInds.getRawPtr(),
5164 tgtGblColInds.size(), tgt_is_unique,
5165 srcGblColInds.getRawPtr(),
5166 srcGblColInds.size(), src_is_unique);
5170 std::ostringstream os;
5171 os << *prefix <<
"Done" << endl;
5172 std::cerr << os.str();
5176 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5178 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5180 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5181 computeCrsPaddingForImports(
5182 const Kokkos::DualView<
const local_ordinal_type*,
5183 buffer_device_type>& importLIDs,
5184 Kokkos::DualView<packet_type*, buffer_device_type> imports,
5185 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5186 const bool verbose)
const
5188 using Details::Impl::getRowGraphGlobalRow;
5191 using GO = global_ordinal_type;
5192 const char tfecfFuncName[] =
"computeCrsPaddingForImports";
5194 std::unique_ptr<std::string> prefix;
5196 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5197 std::ostringstream os;
5198 os << *prefix <<
"importLIDs.extent(0): "
5199 << importLIDs.extent(0)
5200 <<
", imports.extent(0): "
5201 << imports.extent(0)
5202 <<
", numPacketsPerLID.extent(0): "
5203 << numPacketsPerLID.extent(0) << endl;
5204 std::cerr << os.str();
5207 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
5208 const int myRank = [&] () {
5209 auto comm = rowMap_.is_null() ? Teuchos::null :
5211 return comm.is_null() ? -1 : comm->getRank();
5213 std::unique_ptr<padding_type> padding(
5214 new padding_type(myRank, numImports));
5216 if (imports.need_sync_host()) {
5217 imports.sync_host();
5219 auto imports_h = imports.view_host();
5220 if (numPacketsPerLID.need_sync_host ()) {
5221 numPacketsPerLID.sync_host();
5223 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5225 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5226 auto importLIDs_h = importLIDs.view_host();
5228 const map_type& tgtRowMap = *rowMap_;
5232 constexpr bool src_is_unique =
false;
5233 const bool tgt_is_unique = isMerged();
5235 std::vector<GO> tgtGblColIndsScratch;
5237 execute_sync_host_uvm_access();
5238 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5243 const LO origSrcNumEnt =
5244 static_cast<LO
>(numPacketsPerLID_h[whichImport]);
5245 GO*
const srcGblColInds = imports_h.data() + offset;
5247 const LO tgtLclRowInd = importLIDs_h[whichImport];
5248 const GO tgtGblRowInd =
5249 tgtRowMap.getGlobalElement(tgtLclRowInd);
5250 auto tgtGblColInds = getRowGraphGlobalRow(
5251 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5252 const size_t origTgtNumEnt(tgtGblColInds.size());
5254 padding->update_import(whichImport, tgtLclRowInd,
5255 tgtGblColInds.getRawPtr(),
5256 origTgtNumEnt, tgt_is_unique,
5258 origSrcNumEnt, src_is_unique);
5259 offset += origSrcNumEnt;
5263 std::ostringstream os;
5264 os << *prefix <<
"Done" << endl;
5265 std::cerr << os.str();
5270 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5272 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5274 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5275 computePaddingForCrsMatrixUnpack(
5276 const Kokkos::DualView<
const local_ordinal_type*,
5277 buffer_device_type>& importLIDs,
5278 Kokkos::DualView<char*, buffer_device_type> imports,
5279 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5280 const bool verbose)
const
5282 using Details::Impl::getRowGraphGlobalRow;
5283 using Details::PackTraits;
5286 using GO = global_ordinal_type;
5287 const char tfecfFuncName[] =
"computePaddingForCrsMatrixUnpack";
5289 std::unique_ptr<std::string> prefix;
5291 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5292 std::ostringstream os;
5293 os << *prefix <<
"importLIDs.extent(0): "
5294 << importLIDs.extent(0)
5295 <<
", imports.extent(0): "
5296 << imports.extent(0)
5297 <<
", numPacketsPerLID.extent(0): "
5298 << numPacketsPerLID.extent(0) << endl;
5299 std::cerr << os.str();
5301 const bool extraVerbose =
5304 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
5305 TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5306 const int myRank = [&] () {
5307 auto comm = rowMap_.is_null() ? Teuchos::null :
5309 return comm.is_null() ? -1 : comm->getRank();
5311 std::unique_ptr<padding_type> padding(
5312 new padding_type(myRank, numImports));
5314 if (imports.need_sync_host()) {
5315 imports.sync_host();
5317 auto imports_h = imports.view_host();
5318 if (numPacketsPerLID.need_sync_host ()) {
5319 numPacketsPerLID.sync_host();
5321 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5323 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5324 auto importLIDs_h = importLIDs.view_host();
5326 const map_type& tgtRowMap = *rowMap_;
5330 constexpr bool src_is_unique =
false;
5331 const bool tgt_is_unique = isMerged();
5333 std::vector<GO> srcGblColIndsScratch;
5334 std::vector<GO> tgtGblColIndsScratch;
5336 execute_sync_host_uvm_access();
5337 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5342 const size_t numBytes = numPacketsPerLID_h[whichImport];
5344 std::ostringstream os;
5345 os << *prefix <<
"whichImport=" << whichImport
5346 <<
", numImports=" << numImports
5347 <<
", numBytes=" << numBytes << endl;
5348 std::cerr << os.str();
5350 if (numBytes == 0) {
5353 LO origSrcNumEnt = 0;
5354 const size_t numEntBeg = offset;
5355 const size_t numEntLen =
5356 PackTraits<LO>::packValueCount(origSrcNumEnt);
5357 TEUCHOS_ASSERT( numBytes >= numEntLen );
5358 TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5359 PackTraits<LO>::unpackValue(origSrcNumEnt,
5360 imports_h.data() + numEntBeg);
5362 std::ostringstream os;
5363 os << *prefix <<
"whichImport=" << whichImport
5364 <<
", numImports=" << numImports
5365 <<
", origSrcNumEnt=" << origSrcNumEnt << endl;
5366 std::cerr << os.str();
5368 TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5369 TEUCHOS_ASSERT( numBytes >=
size_t(numEntLen + origSrcNumEnt *
sizeof(GO)) );
5370 const size_t gidsBeg = numEntBeg + numEntLen;
5371 if (srcGblColIndsScratch.size() <
size_t(origSrcNumEnt)) {
5372 srcGblColIndsScratch.resize(origSrcNumEnt);
5374 GO*
const srcGblColInds = srcGblColIndsScratch.data();
5375 PackTraits<GO>::unpackArray(srcGblColInds,
5376 imports_h.data() + gidsBeg,
5378 const LO tgtLclRowInd = importLIDs_h[whichImport];
5379 const GO tgtGblRowInd =
5380 tgtRowMap.getGlobalElement(tgtLclRowInd);
5381 auto tgtGblColInds = getRowGraphGlobalRow(
5382 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5383 const size_t origNumTgtEnt(tgtGblColInds.size());
5386 std::ostringstream os;
5387 os << *prefix <<
"whichImport=" << whichImport
5388 <<
", numImports=" << numImports
5389 <<
": Call padding->update_import" << endl;
5390 std::cerr << os.str();
5392 padding->update_import(whichImport, tgtLclRowInd,
5393 tgtGblColInds.getRawPtr(),
5394 origNumTgtEnt, tgt_is_unique,
5396 origSrcNumEnt, src_is_unique);
5401 std::ostringstream os;
5402 os << *prefix <<
"Done" << endl;
5403 std::cerr << os.str();
5408 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5410 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5412 (
const SrcDistObject& source,
5413 const Kokkos::DualView<
const local_ordinal_type*,
5414 buffer_device_type>& exportLIDs,
5415 Kokkos::DualView<packet_type*,
5416 buffer_device_type>& exports,
5417 Kokkos::DualView<
size_t*,
5418 buffer_device_type> numPacketsPerLID,
5419 size_t& constantNumPackets)
5422 using GO = global_ordinal_type;
5424 using crs_graph_type =
5425 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5426 const char tfecfFuncName[] =
"packAndPrepare: ";
5427 ProfilingRegion region_papn (
"Tpetra::CrsGraph::packAndPrepare");
5429 const bool verbose = verbose_;
5430 std::unique_ptr<std::string> prefix;
5432 prefix = this->
createPrefix(
"CrsGraph",
"packAndPrepare");
5433 std::ostringstream os;
5434 os << *prefix <<
"Start" << endl;
5435 std::cerr << os.str();
5438 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5439 (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5441 "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5442 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5444 const row_graph_type* srcRowGraphPtr =
5445 dynamic_cast<const row_graph_type*
> (&source);
5446 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5447 (srcRowGraphPtr ==
nullptr, std::invalid_argument,
"Source of an Export "
5448 "or Import operation to a CrsGraph must be a RowGraph with the same "
5449 "template parameters.");
5453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5454 (this->isFillComplete (), std::runtime_error,
5455 "The target graph of an Import or Export must not be fill complete.");
5457 const crs_graph_type* srcCrsGraphPtr =
5458 dynamic_cast<const crs_graph_type*
> (&source);
5460 if (srcCrsGraphPtr ==
nullptr) {
5461 using Teuchos::ArrayView;
5465 std::ostringstream os;
5466 os << *prefix <<
"Source is a RowGraph but not a CrsGraph"
5468 std::cerr << os.str();
5475 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5476 auto exportLIDs_h = exportLIDs.view_host ();
5477 ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5478 exportLIDs_h.extent (0));
5479 Teuchos::Array<GO> exports_a;
5481 numPacketsPerLID.clear_sync_state ();
5482 numPacketsPerLID.modify_host ();
5483 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5484 ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5485 numPacketsPerLID_h.extent (0));
5486 srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5487 constantNumPackets);
5488 const size_t newSize =
static_cast<size_t> (exports_a.size ());
5489 if (
static_cast<size_t> (exports.extent (0)) != newSize) {
5490 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5491 exports = exports_dv_type (
"exports", newSize);
5493 Kokkos::View<
const packet_type*, Kokkos::HostSpace,
5494 Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5495 exports.clear_sync_state ();
5496 exports.modify_host ();
5498 Kokkos::deep_copy (exports.view_host (), exports_a_h);
5501 else if (! getColMap ().is_null () &&
5502 (rowPtrsPacked_dev_.extent (0) != 0 ||
5503 getRowMap ()->getLocalNumElements () == 0)) {
5505 std::ostringstream os;
5506 os << *prefix <<
"packCrsGraphNew path" << endl;
5507 std::cerr << os.str();
5509 using export_pids_type =
5510 Kokkos::DualView<const int*, buffer_device_type>;
5511 export_pids_type exportPIDs;
5513 using NT = node_type;
5515 packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5516 exports, numPacketsPerLID,
5517 constantNumPackets,
false);
5520 srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5521 constantNumPackets);
5525 std::ostringstream os;
5526 os << *prefix <<
"Done" << endl;
5527 std::cerr << os.str();
5531 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5535 Teuchos::Array<GlobalOrdinal>& exports,
5539 auto col_map = this->getColMap();
5541 if( !
col_map.is_null() && (rowPtrsPacked_dev_.extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5552 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5556 Teuchos::Array<GlobalOrdinal>& exports,
5563 using host_execution_space =
5564 typename Kokkos::View<size_t*, device_type>::
5565 HostMirror::execution_space;
5567 const bool verbose = verbose_;
5570 std::unique_ptr<std::string>
prefix;
5572 prefix = this->createPrefix(
"CrsGraph",
"allocateIndices");
5573 std::ostringstream
os;
5575 std::cerr <<
os.str();
5577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5578 (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5579 "exportLIDs.size() = " << numExportLIDs <<
" != numPacketsPerLID.size()"
5580 " = " << numPacketsPerLID.size () <<
".");
5582 const map_type&
rowMap = * (this->getRowMap ());
5583 const map_type*
const colMapPtr = this->colMap_.getRawPtr ();
5584 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5585 (this->isLocallyIndexed () && colMapPtr ==
nullptr, std::logic_error,
5586 "This graph claims to be locally indexed, but its column Map is nullptr. "
5587 "This should never happen. Please report this bug to the Tpetra "
5591 constantNumPackets = 0;
5595 size_t*
const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5596 const LO*
const exportLIDs_raw = exportLIDs.getRawPtr ();
5603 Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5604 size_t totalNumPackets = 0;
5605 size_t errCount = 0;
5608 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5610 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5611 constexpr size_t ONE = 1;
5613 execute_sync_host_uvm_access();
5614 Kokkos::parallel_reduce (
"Tpetra::CrsGraph::pack: totalNumPackets",
5616 [=] (
const LO& i,
size_t& curTotalNumPackets) {
5617 const GO gblRow =
rowMap.getGlobalElement (exportLIDs_raw[i]);
5618 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5619 Kokkos::atomic_add (&errCountView(), ONE);
5620 numPacketsPerLID_raw[i] = 0;
5623 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5624 numPacketsPerLID_raw[i] = numEnt;
5625 curTotalNumPackets += numEnt;
5631 std::ostringstream os;
5632 os << *prefix <<
"totalNumPackets=" << totalNumPackets << endl;
5633 std::cerr << os.str();
5635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5636 (errCount != 0, std::logic_error,
"totalNumPackets count encountered "
5637 "one or more errors! errCount = " << errCount
5638 <<
", totalNumPackets = " << totalNumPackets <<
".");
5642 exports.resize (totalNumPackets);
5644 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5645 (! this->supportsRowViews (), std::logic_error,
5646 "this->supportsRowViews() returns false; this should never happen. "
5647 "Please report this bug to the Tpetra developers.");
5653 std::ostringstream os;
5654 os << *prefix <<
"Pack into exports" << endl;
5655 std::cerr << os.str();
5660 GO*
const exports_raw = exports.getRawPtr ();
5662 Kokkos::parallel_scan (
"Tpetra::CrsGraph::pack: pack from views",
5663 inputRange, [=, &prefix]
5664 (
const LO i,
size_t& exportsOffset,
const bool final) {
5665 const size_t curOffset = exportsOffset;
5666 const GO gblRow =
rowMap.getGlobalElement (exportLIDs_raw[i]);
5667 const RowInfo rowInfo =
5668 this->getRowInfoFromGlobalRowIndex (gblRow);
5670 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5671 if (rowInfo.localRow == TDO::invalid ()) {
5673 std::ostringstream os;
5674 os << *prefix <<
": INVALID rowInfo: i=" << i
5675 <<
", lclRow=" << exportLIDs_raw[i] << endl;
5676 std::cerr << os.str();
5678 Kokkos::atomic_add (&errCountView(), ONE);
5680 else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5682 std::ostringstream os;
5683 os << *prefix <<
": UH OH! For i=" << i <<
", lclRow="
5684 << exportLIDs_raw[i] <<
", gblRow=" << gblRow <<
", curOffset "
5685 "(= " << curOffset <<
") + numEnt (= " << rowInfo.numEntries
5686 <<
") > totalNumPackets (= " << totalNumPackets <<
")."
5688 std::cerr << os.str();
5690 Kokkos::atomic_add (&errCountView(), ONE);
5693 const LO numEnt =
static_cast<LO
> (rowInfo.numEntries);
5694 if (this->isLocallyIndexed ()) {
5695 auto lclColInds = getLocalIndsViewHost (rowInfo);
5697 for (LO k = 0; k < numEnt; ++k) {
5698 const LO lclColInd = lclColInds(k);
5699 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5703 exports_raw[curOffset + k] = gblColInd;
5706 exportsOffset = curOffset + numEnt;
5708 else if (this->isGloballyIndexed ()) {
5709 auto gblColInds = getGlobalIndsViewHost (rowInfo);
5711 for (LO k = 0; k < numEnt; ++k) {
5712 const GO gblColInd = gblColInds(k);
5716 exports_raw[curOffset + k] = gblColInd;
5719 exportsOffset = curOffset + numEnt;
5727 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5728 (errCount != 0, std::logic_error,
"Packing encountered "
5729 "one or more errors! errCount = " << errCount
5730 <<
", totalNumPackets = " << totalNumPackets <<
".");
5733 std::ostringstream os;
5734 os << *prefix <<
"Done" << endl;
5735 std::cerr << os.str();
5739 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5741 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5742 packFillActiveNew (
const Kokkos::DualView<
const local_ordinal_type*,
5743 buffer_device_type>& exportLIDs,
5744 Kokkos::DualView<packet_type*,
5745 buffer_device_type>& exports,
5746 Kokkos::DualView<
size_t*,
5747 buffer_device_type> numPacketsPerLID,
5748 size_t& constantNumPackets)
const
5752 using GO = global_ordinal_type;
5753 using host_execution_space =
typename Kokkos::View<
size_t*,
5754 device_type>::HostMirror::execution_space;
5755 using host_device_type =
5756 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5757 using exports_dv_type =
5758 Kokkos::DualView<packet_type*, buffer_device_type>;
5759 const char tfecfFuncName[] =
"packFillActiveNew: ";
5760 const bool verbose = verbose_;
5762 const auto numExportLIDs = exportLIDs.extent (0);
5763 std::unique_ptr<std::string> prefix;
5765 prefix = this->
createPrefix(
"CrsGraph",
"packFillActiveNew");
5766 std::ostringstream os;
5767 os << *prefix <<
"numExportLIDs: " << numExportLIDs
5768 <<
", numPacketsPerLID.extent(0): "
5769 << numPacketsPerLID.extent(0) << endl;
5770 std::cerr << os.str();
5772 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5773 (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5774 "exportLIDs.extent(0) = " << numExportLIDs
5775 <<
" != numPacketsPerLID.extent(0) = "
5776 << numPacketsPerLID.extent (0) <<
".");
5777 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5778 auto exportLIDs_h = exportLIDs.view_host ();
5780 const map_type&
rowMap = * (this->getRowMap ());
5781 const map_type*
const colMapPtr = this->colMap_.getRawPtr ();
5782 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5783 (this->isLocallyIndexed () && colMapPtr ==
nullptr, std::logic_error,
5784 "This graph claims to be locally indexed, but its column Map is nullptr. "
5785 "This should never happen. Please report this bug to the Tpetra "
5789 constantNumPackets = 0;
5791 numPacketsPerLID.clear_sync_state ();
5792 numPacketsPerLID.modify_host ();
5793 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5800 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5801 range_type inputRange (0, numExportLIDs);
5802 size_t totalNumPackets = 0;
5803 size_t errCount = 0;
5806 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5807 constexpr size_t ONE = 1;
5810 std::ostringstream os;
5811 os << *prefix <<
"Compute totalNumPackets" << endl;
5812 std::cerr << os.str ();
5815 execute_sync_host_uvm_access();
5816 Kokkos::parallel_reduce
5817 (
"Tpetra::CrsGraph::pack: totalNumPackets",
5819 [=, &prefix] (
const LO i,
size_t& curTotalNumPackets) {
5820 const LO lclRow = exportLIDs_h[i];
5821 const GO gblRow =
rowMap.getGlobalElement (lclRow);
5822 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5824 std::ostringstream os;
5825 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5826 <<
" not in row Map on this process" << endl;
5827 std::cerr << os.str();
5829 Kokkos::atomic_add (&errCountView(), ONE);
5830 numPacketsPerLID_h(i) = 0;
5833 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5834 numPacketsPerLID_h(i) = numEnt;
5835 curTotalNumPackets += numEnt;
5841 std::ostringstream os;
5842 os << *prefix <<
"totalNumPackets: " << totalNumPackets
5843 <<
", errCount: " << errCount << endl;
5844 std::cerr << os.str ();
5846 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5847 (errCount != 0, std::logic_error,
"totalNumPackets count encountered "
5848 "one or more errors! totalNumPackets: " << totalNumPackets
5849 <<
", errCount: " << errCount <<
".");
5852 if (
size_t(exports.extent (0)) < totalNumPackets) {
5854 exports = exports_dv_type (
"exports", totalNumPackets);
5857 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5858 (! this->supportsRowViews (), std::logic_error,
5859 "this->supportsRowViews() returns false; this should never happen. "
5860 "Please report this bug to the Tpetra developers.");
5866 std::ostringstream os;
5867 os << *prefix <<
"Pack into exports buffer" << endl;
5868 std::cerr << os.str();
5871 exports.clear_sync_state ();
5872 exports.modify_host ();
5873 auto exports_h = exports.view_host ();
5876 Kokkos::parallel_scan
5877 (
"Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5878 inputRange, [=, &prefix]
5879 (
const LO i,
size_t& exportsOffset,
const bool final) {
5880 const size_t curOffset = exportsOffset;
5881 const LO lclRow = exportLIDs_h(i);
5882 const GO gblRow =
rowMap.getGlobalElement (lclRow);
5883 if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5885 std::ostringstream os;
5886 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5887 <<
" not in row Map on this process" << endl;
5888 std::cerr << os.str();
5890 Kokkos::atomic_add (&errCountView(), ONE);
5894 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5895 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5897 std::ostringstream os;
5898 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5899 <<
", gblRow=" << gblRow <<
": invalid rowInfo"
5901 std::cerr << os.str();
5903 Kokkos::atomic_add (&errCountView(), ONE);
5907 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5909 std::ostringstream os;
5910 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5911 <<
", gblRow=" << gblRow <<
", curOffset (= "
5912 << curOffset <<
") + numEnt (= " << rowInfo.numEntries
5913 <<
") > totalNumPackets (= " << totalNumPackets
5915 std::cerr << os.str();
5917 Kokkos::atomic_add (&errCountView(), ONE);
5921 const LO numEnt =
static_cast<LO
> (rowInfo.numEntries);
5922 if (this->isLocallyIndexed ()) {
5923 auto lclColInds = getLocalIndsViewHost(rowInfo);
5925 for (LO k = 0; k < numEnt; ++k) {
5926 const LO lclColInd = lclColInds(k);
5927 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5931 exports_h(curOffset + k) = gblColInd;
5934 exportsOffset = curOffset + numEnt;
5936 else if (this->isGloballyIndexed ()) {
5937 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5939 for (LO k = 0; k < numEnt; ++k) {
5940 const GO gblColInd = gblColInds(k);
5944 exports_h(curOffset + k) = gblColInd;
5947 exportsOffset = curOffset + numEnt;
5960 std::ostringstream os;
5961 os << *prefix <<
"errCount=" << errCount <<
"; Done" << endl;
5962 std::cerr << os.str();
5966 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5968 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5970 (
const Kokkos::DualView<
const local_ordinal_type*,
5971 buffer_device_type>& importLIDs,
5972 Kokkos::DualView<packet_type*,
5973 buffer_device_type> imports,
5974 Kokkos::DualView<
size_t*,
5975 buffer_device_type> numPacketsPerLID,
5979 using Details::ProfilingRegion;
5982 using GO = global_ordinal_type;
5983 const char tfecfFuncName[] =
"unpackAndCombine";
5985 ProfilingRegion regionCGC(
"Tpetra::CrsGraph::unpackAndCombine");
5986 const bool verbose = verbose_;
5988 std::unique_ptr<std::string> prefix;
5990 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5991 std::ostringstream os;
5992 os << *prefix <<
"Start" << endl;
5993 std::cerr << os.str ();
5996 auto padding = computeCrsPaddingForImports(
5997 importLIDs, imports, numPacketsPerLID, verbose);
5998 applyCrsPadding(*padding, verbose);
6000 std::ostringstream os;
6001 os << *prefix <<
"Done computing & applying padding" << endl;
6002 std::cerr << os.str ();
6023 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6024 (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6025 std::runtime_error,
": importLIDs.extent(0) = "
6026 << importLIDs.extent (0) <<
" != numPacketsPerLID.extent(0) = "
6027 << numPacketsPerLID.extent (0) <<
".");
6028 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6029 (isFillComplete (), std::runtime_error,
6030 ": Import or Export operations are not allowed on a target "
6031 "CrsGraph that is fillComplete.");
6033 const size_t numImportLIDs(importLIDs.extent(0));
6034 if (numPacketsPerLID.need_sync_host()) {
6035 numPacketsPerLID.sync_host();
6037 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6038 if (imports.need_sync_host()) {
6039 imports.sync_host();
6041 auto imports_h = imports.view_host();
6042 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6043 auto importLIDs_h = importLIDs.view_host();
6046 Teuchos::Array<LO> lclColInds;
6047 if (isLocallyIndexed()) {
6049 std::ostringstream os;
6050 os << *prefix <<
"Preallocate local indices scratch" << endl;
6051 std::cerr << os.str();
6053 size_t maxNumInserts = 0;
6054 for (
size_t i = 0; i < numImportLIDs; ++i) {
6055 maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6058 std::ostringstream os;
6059 os << *prefix <<
"Local indices scratch size: "
6060 << maxNumInserts << endl;
6061 std::cerr << os.str();
6063 lclColInds.resize (maxNumInserts);
6067 std::ostringstream os;
6069 if (isGloballyIndexed()) {
6070 os <<
"Graph is globally indexed";
6073 os <<
"Graph is neither locally nor globally indexed";
6076 std::cerr << os.str();
6080 TEUCHOS_ASSERT( ! rowMap_.is_null() );
6081 const map_type&
rowMap = *rowMap_;
6084 size_t importsOffset = 0;
6085 for (
size_t i = 0; i < numImportLIDs; ++i) {
6087 std::ostringstream os;
6088 os << *prefix <<
"i=" << i <<
", numImportLIDs="
6089 << numImportLIDs << endl;
6090 std::cerr << os.str();
6094 const LO lclRow = importLIDs_h[i];
6095 const GO gblRow =
rowMap.getGlobalElement(lclRow);
6096 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6097 (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6098 std::logic_error,
"importLIDs[i=" << i <<
"]="
6099 << lclRow <<
" is not in the row Map on the calling "
6101 const LO numEnt = numPacketsPerLID_h[i];
6102 const GO*
const gblColInds = (numEnt == 0) ?
nullptr :
6103 imports_h.data() + importsOffset;
6104 if (! isLocallyIndexed()) {
6105 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6110 for (LO j = 0; j < numEnt; j++) {
6111 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6113 insertLocalIndices(lclRow, numEnt, lclColInds.data());
6115 importsOffset += numEnt;
6118 catch (std::exception& e) {
6119 TEUCHOS_TEST_FOR_EXCEPTION
6120 (
true, std::runtime_error,
6121 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6122 "exception: " << endl << e.what());
6126 std::ostringstream os;
6127 os << *prefix <<
"Done" << endl;
6128 std::cerr << os.str();
6132 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6137 using Teuchos::Comm;
6138 using Teuchos::null;
6139 using Teuchos::ParameterList;
6153 if (! domainMap_.is_null ()) {
6154 if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6164 if (! rangeMap_.is_null ()) {
6165 if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6175 if (! colMap_.is_null ()) {
6189 if (! rangeMap_.is_null () &&
6192 if (
params.is_null () || !
params->isSublist (
"Export")) {
6201 if (! domainMap_.is_null () &&
6204 if (
params.is_null () || !
params->isSublist (
"Import")) {
6230 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6233 getLocalDiagOffsets (
const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets)
const
6239 const bool verbose = verbose_;
6241 std::unique_ptr<std::string>
prefix;
6243 prefix = this->createPrefix(
"CrsGraph",
"getLocalDiagOffsets");
6244 std::ostringstream
os;
6245 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
6247 std::cerr <<
os.str();
6251 (! hasColMap (), std::runtime_error,
"The graph must have a column Map.");
6252 const LO
lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
6254 (
static_cast<LO
> (offsets.extent (0)) <
lclNumRows,
6255 std::invalid_argument,
"offsets.extent(0) = " <<
6256 offsets.extent (0) <<
" < getLocalNumRows() = " <<
lclNumRows <<
".");
6281 const bool sorted = this->isSorted ();
6282 if (isFillComplete ()) {
6283 auto lclGraph = this->getLocalGraphDevice ();
6292 auto offsets_h = Kokkos::create_mirror_view (offsets);
6302 if (
lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6312 const size_t hint = 0;
6323 typename local_inds_dualv_type::t_host::const_type
lclColInds;
6360 std::ostringstream
os;
6361 os << *
prefix <<
"Wrong offsets: [";
6370 std::cerr <<
os.str();
6374 using Teuchos::reduceAll;
6376 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6400 std::ostringstream
os;
6401 os <<
"Issue(s) that we noticed (on Process " <<
gblResults[4] <<
", "
6402 "possibly among others): " <<
endl;
6404 os <<
" - The column Map does not contain at least one diagonal entry "
6405 "of the graph." <<
endl;
6408 os <<
" - On one or more processes, some row does not contain a "
6409 "diagonal entry." <<
endl;
6412 os <<
" - On one or more processes, some offsets are incorrect."
6416 os <<
" - One or more processes had some other error."
6424 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6431 const bool verbose = verbose_;
6433 std::unique_ptr<std::string>
prefix;
6435 prefix = this->createPrefix(
"CrsGraph",
"getLocalOffRankOffsets");
6436 std::ostringstream
os;
6437 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
6439 std::cerr <<
os.str();
6443 (! hasColMap (), std::runtime_error,
"The graph must have a column Map.");
6446 const size_t lclNumRows = this->getLocalNumRows ();
6448 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) ==
lclNumRows+1) {
6449 offsets = k_offRankOffsets_;
6452 haveLocalOffRankOffsets_ =
false;
6453 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing(
"offRankOffset"),
lclNumRows+1);
6454 offsets = k_offRankOffsets_;
6469 if (isFillComplete ()) {
6470 auto lclGraph = this->getLocalGraphDevice ();
6471 ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6474 haveLocalOffRankOffsets_ =
true;
6496 template<
class DeviceType,
6498 std::is_same<
typename DeviceType::memory_space,
6499 Kokkos::HostSpace>::value>
6500 struct HelpGetLocalDiagOffsets {};
6502 template<
class DeviceType>
6503 struct HelpGetLocalDiagOffsets<DeviceType, true> {
6504 typedef DeviceType device_type;
6505 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6506 Kokkos::MemoryUnmanaged> device_offsets_type;
6507 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6508 Kokkos::MemoryUnmanaged> host_offsets_type;
6510 static device_offsets_type
6511 getDeviceOffsets (
const host_offsets_type& hostOffsets)
6519 copyBackIfNeeded (
const host_offsets_type& ,
6520 const device_offsets_type& )
6524 template<
class DeviceType>
6525 struct HelpGetLocalDiagOffsets<DeviceType, false> {
6526 typedef DeviceType device_type;
6530 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6531 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6532 Kokkos::MemoryUnmanaged> host_offsets_type;
6534 static device_offsets_type
6535 getDeviceOffsets (
const host_offsets_type& hostOffsets)
6539 return device_offsets_type (
"offsets", hostOffsets.extent (0));
6543 copyBackIfNeeded (
const host_offsets_type& hostOffsets,
6544 const device_offsets_type& deviceOffsets)
6547 Kokkos::deep_copy (hostOffsets, deviceOffsets);
6553 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6561 (! this->hasColMap (), std::runtime_error,
6562 "The graph does not yet have a column Map.");
6563 const LO
myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
6564 if (
static_cast<LO
> (offsets.size ()) !=
myNumRows) {
6581 typedef typename helper_type::host_offsets_type host_offsets_type;
6591 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6598 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6602 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>&
rowTransfer,
6603 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > &
domainTransfer,
6604 const Teuchos::RCP<const map_type>& domainMap,
6605 const Teuchos::RCP<const map_type>&
rangeMap,
6606 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
6612 using Teuchos::ArrayRCP;
6613 using Teuchos::ArrayView;
6614 using Teuchos::Comm;
6615 using Teuchos::ParameterList;
6618#ifdef HAVE_TPETRA_MMM_TIMINGS
6620 using Teuchos::TimeMonitor;
6625 using NT = node_type;
6629 const char*
prefix =
"Tpetra::CrsGraph::transferAndFillComplete: ";
6631#ifdef HAVE_TPETRA_MMM_TIMINGS
6633 if(!
params.is_null()) label =
params->get(
"Timer Label", label);
6634 string prefix2 =
string(
"Tpetra ")+ label + std::string(
": CrsGraph TAFC ");
6648 prefix <<
"The 'rowTransfer' input argument must be either an Import or "
6649 "an Export, and its template parameters must match the corresponding "
6650 "template parameters of the CrsGraph.");
6666 prefix <<
"The 'domainTransfer' input argument must be either an "
6667 "Import or an Export, and its template parameters must match the "
6668 "corresponding template parameters of the CrsGraph.");
6674 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
6675 "must be of the same type (either Import or Export).");
6681 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
6682 "must be of the same type (either Import or Export).");
6689 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6695 bool reverseMode =
false;
6696 bool restrictComm =
false;
6697 RCP<ParameterList> graphparams;
6698 if (! params.is_null()) {
6699 reverseMode = params->get(
"Reverse Mode", reverseMode);
6700 restrictComm = params->get(
"Restrict Communicator", restrictComm);
6701 graphparams = sublist(params,
"CrsGraph");
6706 RCP<const map_type> MyRowMap = reverseMode ?
6707 rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6708 RCP<const map_type> MyColMap;
6709 RCP<const map_type> MyDomainMap = !
domainMap.is_null() ?
domainMap : getDomainMap();
6710 RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6711 RCP<const map_type> BaseRowMap = MyRowMap;
6712 RCP<const map_type> BaseDomainMap = MyDomainMap;
6720 if (! destGraph.is_null()) {
6731 const bool NewFlag =
6732 ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6733 TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6734 prefix <<
"The input argument 'destGraph' is only allowed to be nonnull, "
6735 "if its graph is empty (neither locally nor globally indexed).");
6744 TEUCHOS_TEST_FOR_EXCEPTION(
6745 ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6746 prefix <<
"The (row) Map of the input argument 'destGraph' is not the "
6747 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6749 TEUCHOS_TEST_FOR_EXCEPTION(
6750 ! destGraph->checkSizes(*
this), std::invalid_argument,
6751 prefix <<
"You provided a nonnull destination graph, but checkSizes() "
6752 "indicates that it is not a legal legal target for redistribution from "
6753 "the source graph (*this). This may mean that they do not have the "
6754 "same dimensions.");
6768 TEUCHOS_TEST_FOR_EXCEPTION(
6769 ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6770 std::invalid_argument, prefix <<
6771 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6773 TEUCHOS_TEST_FOR_EXCEPTION(
6774 ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6775 std::invalid_argument, prefix <<
6776 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6779 TEUCHOS_TEST_FOR_EXCEPTION(
6780 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*
domainMap),
6781 std::invalid_argument,
6782 prefix <<
"The target map of the 'domainTransfer' input argument must be "
6783 "the same as the rebalanced domain map 'domainMap'");
6785 TEUCHOS_TEST_FOR_EXCEPTION(
6786 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*
domainMap),
6787 std::invalid_argument,
6788 prefix <<
"The source map of the 'domainTransfer' input argument must be "
6789 "the same as the rebalanced domain map 'domainMap'");
6802 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6803 ArrayView<const LO> ExportLIDs = reverseMode ?
6804 rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6805 ArrayView<const LO> RemoteLIDs = reverseMode ?
6806 rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6807 ArrayView<const LO> PermuteToLIDs = reverseMode ?
6808 rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6809 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6810 rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6811 Distributor& Distor = rowTransfer.getDistributor();
6814 Teuchos::Array<int> SourcePids;
6815 Teuchos::Array<int> TargetPids;
6816 int MyPID = getComm()->getRank();
6819 RCP<const map_type> ReducedRowMap, ReducedColMap,
6820 ReducedDomainMap, ReducedRangeMap;
6821 RCP<const Comm<int> > ReducedComm;
6825 if (destGraph.is_null()) {
6826 destGraph = rcp(
new this_CRS_type(MyRowMap, 0, graphparams));
6833 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6834 ReducedComm = ReducedRowMap.is_null() ?
6836 ReducedRowMap->getComm();
6837 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6839 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
6841 MyDomainMap->replaceCommWithSubset(ReducedComm);
6842 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
6844 MyRangeMap->replaceCommWithSubset(ReducedComm);
6847 MyRowMap = ReducedRowMap;
6848 MyDomainMap = ReducedDomainMap;
6849 MyRangeMap = ReducedRangeMap;
6852 if (! ReducedComm.is_null()) {
6853 MyPID = ReducedComm->getRank();
6860 ReducedComm = MyRowMap->getComm();
6866#ifdef HAVE_TPETRA_MMM_TIMINGS
6868 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"ImportSetup"))));
6871 RCP<const import_type> MyImporter = getImporter();
6874 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6876 if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
6883 Import_Util::getPids(*MyImporter, SourcePids,
false);
6885 else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
6888 ivector_type SourceDomain_pids(getDomainMap(),
true);
6889 ivector_type SourceCol_pids(getColMap());
6891 SourceDomain_pids.putScalar(MyPID);
6893 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
6894 SourcePids.resize(getColMap()->getLocalNumElements());
6895 SourceCol_pids.get1dCopy(SourcePids());
6897 else if (MyImporter.is_null() && bSameDomainMap) {
6899 SourcePids.resize(getColMap()->getLocalNumElements());
6900 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6902 else if ( ! MyImporter.is_null() &&
6903 ! domainTransfer.is_null() ) {
6910 ivector_type TargetDomain_pids(
domainMap);
6911 TargetDomain_pids.putScalar(MyPID);
6914 ivector_type SourceDomain_pids(getDomainMap());
6917 ivector_type SourceCol_pids(getColMap());
6919 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6920 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
6922 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6923 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
6925 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6926 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
6928 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6929 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
6932 TEUCHOS_TEST_FOR_EXCEPTION(
6933 true, std::logic_error,
6934 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6936 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
6937 SourcePids.resize(getColMap()->getLocalNumElements());
6938 SourceCol_pids.get1dCopy(SourcePids());
6940 else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6941 getDomainMap()->isSameAs(*getRowMap())) {
6944 ivector_type SourceRow_pids(getRowMap());
6945 ivector_type SourceCol_pids(getColMap());
6947 TargetRow_pids.putScalar(MyPID);
6948 if (! reverseMode && xferAsImport !=
nullptr) {
6949 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport,
INSERT);
6951 else if (reverseMode && xferAsExport !=
nullptr) {
6952 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport,
INSERT);
6954 else if (! reverseMode && xferAsExport !=
nullptr) {
6955 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport,
INSERT);
6957 else if (reverseMode && xferAsImport !=
nullptr) {
6958 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport,
INSERT);
6961 TEUCHOS_TEST_FOR_EXCEPTION(
6962 true, std::logic_error,
6963 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6965 SourceCol_pids.doImport(SourceRow_pids, *MyImporter,
INSERT);
6966 SourcePids.resize(getColMap()->getLocalNumElements());
6967 SourceCol_pids.get1dCopy(SourcePids());
6970 TEUCHOS_TEST_FOR_EXCEPTION(
6971 true, std::invalid_argument,
6972 prefix <<
"This method only allows either domainMap == getDomainMap(), "
6973 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6977 size_t constantNumPackets = destGraph->constantNumberOfPackets();
6978 if (constantNumPackets == 0) {
6979 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6987 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6988 destGraph->reallocImportsIfNeeded(rbufLen,
false,
nullptr);
6993 destGraph->numExportPacketsPerLID_.modify_host();
6994 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6999 numExportPacketsPerLID, ExportLIDs,
7000 SourcePids, constantNumPackets);
7004#ifdef HAVE_TPETRA_MMM_TIMINGS
7006 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"Transfer"))));
7009 if (communication_needed) {
7011 if (constantNumPackets == 0) {
7015 destGraph->numExportPacketsPerLID_.sync_host();
7016 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7018 destGraph->numImportPacketsPerLID_.sync_host();
7019 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7022 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7023 destGraph->numImportPacketsPerLID_.view_host());
7024 size_t totalImportPackets = 0;
7026 totalImportPackets += numImportPacketsPerLID[i];
7031 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
7032 destGraph->imports_.modify_host();
7033 auto hostImports = destGraph->imports_.view_host();
7036 destGraph->exports_.sync_host();
7037 auto hostExports = destGraph->exports_.view_host();
7038 Distor.doReversePostsAndWaits(hostExports,
7039 numExportPacketsPerLID,
7041 numImportPacketsPerLID);
7044 destGraph->imports_.modify_host();
7045 auto hostImports = destGraph->imports_.view_host();
7048 destGraph->exports_.sync_host();
7049 auto hostExports = destGraph->exports_.view_host();
7050 Distor.doReversePostsAndWaits(hostExports,
7056 if (constantNumPackets == 0) {
7060 destGraph->numExportPacketsPerLID_.sync_host();
7061 destGraph->numImportPacketsPerLID_.sync_host();
7062 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7063 destGraph->numImportPacketsPerLID_.view_host());
7065 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7067 size_t totalImportPackets = 0;
7069 totalImportPackets += numImportPacketsPerLID[i];
7074 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
7075 destGraph->imports_.modify_host();
7076 auto hostImports = destGraph->imports_.view_host();
7079 destGraph->exports_.sync_host();
7080 auto hostExports = destGraph->exports_.view_host();
7081 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7083 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
7086 destGraph->imports_.modify_host();
7087 auto hostImports = destGraph->imports_.view_host();
7090 destGraph->exports_.sync_host();
7091 auto hostExports = destGraph->exports_.view_host();
7092 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
7101#ifdef HAVE_TPETRA_MMM_TIMINGS
7103 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"Unpack-1"))));
7107 destGraph->numImportPacketsPerLID_.sync_host();
7108 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7110 destGraph->imports_.sync_host();
7111 Teuchos::ArrayView<const packet_type> hostImports =
7115 numImportPacketsPerLID,
7116 constantNumPackets,
INSERT,
7117 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7118 size_t N = BaseRowMap->getLocalNumElements();
7121 ArrayRCP<size_t> CSR_rowptr(N+1);
7122 ArrayRCP<GO> CSR_colind_GID;
7123 ArrayRCP<LO> CSR_colind_LID;
7124 CSR_colind_GID.resize(mynnz);
7128 if (
typeid(LO) ==
typeid(GO)) {
7129 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7132 CSR_colind_LID.resize(mynnz);
7141 numImportPacketsPerLID, constantNumPackets,
7142 INSERT, NumSameIDs, PermuteToLIDs,
7143 PermuteFromLIDs, N, mynnz, MyPID,
7144 CSR_rowptr(), CSR_colind_GID(),
7145 SourcePids(), TargetPids);
7150#ifdef HAVE_TPETRA_MMM_TIMINGS
7152 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"Unpack-2"))));
7157 Teuchos::Array<int> RemotePids;
7158 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7162 TargetPids, RemotePids,
7169 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7171 MyColMap->replaceCommWithSubset(ReducedComm);
7172 MyColMap = ReducedColMap;
7176 destGraph->replaceColMap(MyColMap);
7183 if (ReducedComm.is_null()) {
7190 if ((! reverseMode && xferAsImport !=
nullptr) ||
7191 (reverseMode && xferAsExport !=
nullptr)) {
7192 Import_Util::sortCrsEntries(CSR_rowptr(),
7195 else if ((! reverseMode && xferAsExport !=
nullptr) ||
7196 (reverseMode && xferAsImport !=
nullptr)) {
7197 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7199 if (CSR_rowptr[N] != mynnz) {
7200 CSR_colind_LID.resize(CSR_rowptr[N]);
7204 TEUCHOS_TEST_FOR_EXCEPTION(
7205 true, std::logic_error,
7206 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
7214 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7220 Teuchos::ParameterList esfc_params;
7221#ifdef HAVE_TPETRA_MMM_TIMINGS
7223 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"CreateImporter"))));
7225 RCP<import_type> MyImport = rcp(
new import_type(MyDomainMap, MyColMap, RemotePids));
7226#ifdef HAVE_TPETRA_MMM_TIMINGS
7228 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"ESFC"))));
7230 esfc_params.set(
"Timer Label",prefix + std::string(
"TAFC"));
7232 if(!params.is_null())
7233 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
7235 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7236 MyImport, Teuchos::null, rcp(&esfc_params,
false));
7240 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7245 const Teuchos::RCP<const map_type>& domainMap,
7246 const Teuchos::RCP<const map_type>&
rangeMap,
7247 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7252 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7258 const Teuchos::RCP<const map_type>& domainMap,
7259 const Teuchos::RCP<const map_type>&
rangeMap,
7260 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7265 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7270 const Teuchos::RCP<const map_type>& domainMap,
7271 const Teuchos::RCP<const map_type>&
rangeMap,
7272 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7277 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7283 const Teuchos::RCP<const map_type>& domainMap,
7284 const Teuchos::RCP<const map_type>&
rangeMap,
7285 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7291 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7296 std::swap(
graph.need_sync_host_uvm_access,
this->need_sync_host_uvm_access);
7298 std::swap(
graph.rowMap_,
this->rowMap_);
7299 std::swap(
graph.colMap_,
this->colMap_);
7300 std::swap(
graph.rangeMap_,
this->rangeMap_);
7301 std::swap(
graph.domainMap_,
this->domainMap_);
7303 std::swap(
graph.importer_,
this->importer_);
7304 std::swap(
graph.exporter_,
this->exporter_);
7306 std::swap(
graph.rowPtrsPacked_dev_,
this->rowPtrsPacked_dev_);
7307 std::swap(
graph.rowPtrsPacked_host_,
this->rowPtrsPacked_host_);
7309 std::swap(
graph.nodeMaxNumRowEntries_,
this->nodeMaxNumRowEntries_);
7311 std::swap(
graph.globalNumEntries_,
this->globalNumEntries_);
7312 std::swap(
graph.globalMaxNumRowEntries_,
this->globalMaxNumRowEntries_);
7314 std::swap(
graph.numAllocForAllRows_,
this->numAllocForAllRows_);
7316 std::swap(
graph.rowPtrsUnpacked_dev_,
this->rowPtrsUnpacked_dev_);
7317 std::swap(
graph.rowPtrsUnpacked_host_,
this->rowPtrsUnpacked_host_);
7318 std::swap(
graph.k_offRankOffsets_,
this->k_offRankOffsets_);
7320 std::swap(
graph.lclIndsUnpacked_wdv,
this->lclIndsUnpacked_wdv);
7321 std::swap(
graph.gblInds_wdv,
this->gblInds_wdv);
7322 std::swap(
graph.lclIndsPacked_wdv,
this->lclIndsPacked_wdv);
7324 std::swap(
graph.storageStatus_,
this->storageStatus_);
7326 std::swap(
graph.indicesAreAllocated_,
this->indicesAreAllocated_);
7327 std::swap(
graph.indicesAreLocal_,
this->indicesAreLocal_);
7328 std::swap(
graph.indicesAreGlobal_,
this->indicesAreGlobal_);
7329 std::swap(
graph.fillComplete_,
this->fillComplete_);
7330 std::swap(
graph.indicesAreSorted_,
this->indicesAreSorted_);
7331 std::swap(
graph.noRedundancies_,
this->noRedundancies_);
7332 std::swap(
graph.haveLocalConstants_,
this->haveLocalConstants_);
7333 std::swap(
graph.haveGlobalConstants_,
this->haveGlobalConstants_);
7334 std::swap(
graph.haveLocalOffRankOffsets_,
this->haveLocalOffRankOffsets_);
7336 std::swap(
graph.sortGhostsAssociatedWithEachProcessor_,
this->sortGhostsAssociatedWithEachProcessor_);
7338 std::swap(
graph.k_numAllocPerRow_,
this->k_numAllocPerRow_);
7339 std::swap(
graph.k_numRowEntries_,
this->k_numRowEntries_);
7340 std::swap(
graph.nonlocals_,
this->nonlocals_);
7344 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7358 auto v1 =
m1.find(
key)->second;
7359 auto v2 =
m2.find(
key)->second;
7360 std::sort(
v1.begin(),
v1.end());
7361 std::sort(
v2.begin(),
v2.end());
7380 output = this->nodeMaxNumRowEntries_ ==
graph.nodeMaxNumRowEntries_ ?
output :
false;
7383 output = this->globalMaxNumRowEntries_ ==
graph.globalMaxNumRowEntries_ ?
output :
false;
7385 output = this->numAllocForAllRows_ ==
graph.numAllocForAllRows_ ?
output :
false;
7389 output = this->indicesAreAllocated_ ==
graph.indicesAreAllocated_ ?
output :
false;
7395 output = this->haveLocalConstants_ ==
graph.haveLocalConstants_ ?
output :
false;
7396 output = this->haveGlobalConstants_ ==
graph.haveGlobalConstants_ ?
output :
false;
7397 output = this->haveLocalOffRankOffsets_ ==
graph.haveLocalOffRankOffsets_ ?
output :
false;
7398 output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ?
output :
false;
7406 output = this->k_numAllocPerRow_.extent(0) ==
graph.k_numAllocPerRow_.extent(0) ?
output :
false;
7407 if(
output && this->k_numAllocPerRow_.extent(0) > 0)
7409 for(
size_t i=0;
output &&
i<this->k_numAllocPerRow_.extent(0);
i++)
7415 output = this->k_numRowEntries_.extent(0) ==
graph.k_numRowEntries_.extent(0) ?
output :
false;
7416 if(
output && this->k_numRowEntries_.extent(0) > 0)
7418 for(
size_t i = 0;
output &&
i < this->k_numRowEntries_.extent(0);
i++)
7423 output = this->rowPtrsUnpacked_host_.extent(0) ==
graph.rowPtrsUnpacked_host_.extent(0) ?
output :
false;
7424 if(
output && this->rowPtrsUnpacked_host_.extent(0) > 0)
7433 output = this->lclIndsUnpacked_wdv.extent(0) ==
graph.lclIndsUnpacked_wdv.extent(0) ?
output :
false;
7434 if(
output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7436 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7437 auto indGraph =
graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7443 output = this->gblInds_wdv.extent(0) ==
graph.gblInds_wdv.extent(0) ?
output :
false;
7444 if(
output && this->gblInds_wdv.extent(0) > 0)
7446 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7447 auto indtGraph =
graph.gblInds_wdv.getHostView(Access::ReadOnly);
7520#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7522 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7523 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7524 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7525 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7526 CrsGraph<LO,GO,NODE>::node_type>& importer, \
7527 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7528 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7529 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7530 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7531 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7532 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7533 const Teuchos::RCP<Teuchos::ParameterList>& params);
7535#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7537 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7538 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7539 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7540 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7541 CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7542 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7543 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7544 CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7545 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7546 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7547 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7548 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7549 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7550 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7551 const Teuchos::RCP<Teuchos::ParameterList>& params);
7554#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7556 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7557 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7558 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7559 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7560 CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7561 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7562 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7563 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7564 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7565 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7566 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7567 const Teuchos::RCP<Teuchos::ParameterList>& params);
7569#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7571 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7572 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7573 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7574 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7575 CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7576 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7577 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7578 CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7579 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7580 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7581 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7582 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7583 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7584 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7585 const Teuchos::RCP<Teuchos::ParameterList>& params);
7588#define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7589 template class CrsGraph<LO, GO, NODE>; \
7590 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7591 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7592 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7593 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)