Ifpack Package Browser (Single Doxygen Collection) Development
Loading...
Searching...
No Matches
SubdomainGraph_dh.c
Go to the documentation of this file.
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack: Object-Oriented Algebraic Preconditioner Package
5// Copyright (2002) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#include "SubdomainGraph_dh.h"
44#include "getRow_dh.h"
45#include "Mem_dh.h"
46#include "Parser_dh.h"
47#include "Hash_i_dh.h"
48#include "mat_dh_private.h"
49#include "io_dh.h"
50#include "SortedSet_dh.h"
51#include "shellSort_dh.h"
52
53
54/* for debugging only! */
55#include <unistd.h>
56
57static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj,
58 void *A);
59static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj,
60 void *A);
61/*
62static void partition_metis_private(SubdomainGraph_dh s, void *A);
63
64 grep for same below!
65*/
66static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m,
67 bool bj);
70 void *A);
72 void *A);
74 void *A);
75static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
76 int *interiorNodes, int *bdryNodes,
77 int *interiorCount, int *bdryCount);
79 void *A, int *interiorNodes,
80 int *bdryNodes, int *interiorCount,
81 int *bdryCount);
82
83static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A);
84 /* above also forms n2o[] and o2n[] */
85
89
90#undef __FUNC__
91#define __FUNC__ "SubdomainGraph_dhCreate"
92void
94{
96 struct _subdomain_dh *tmp =
97 (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh));
99 *s = tmp;
100
101 tmp->blocks = 1;
102 tmp->ptrs = tmp->adj = NULL;
103 tmp->colors = 1;
104 tmp->colorVec = NULL;
105 tmp->o2n_sub = tmp->n2o_sub = NULL;
106 tmp->beg_row = tmp->beg_rowP = NULL;
107 tmp->bdry_count = tmp->row_count = NULL;
108 tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
109 tmp->loCount = tmp->hiCount = tmp->allCount = 0;
110
111 tmp->m = 0;
112 tmp->n2o_row = tmp->o2n_col = NULL;
113 tmp->o2n_ext = tmp->n2o_ext = NULL;
114
115 tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor");
116 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph");
117 {
118 int i;
119 for (i = 0; i < TIMING_BINS_SG; ++i)
120 tmp->timing[i] = 0.0;
121 }
123
124#undef __FUNC__
125#define __FUNC__ "SubdomainGraph_dhDestroy"
126void
128{
129 START_FUNC_DH if (s->ptrs != NULL)
130 {
131 FREE_DH (s->ptrs);
133 }
134 if (s->adj != NULL)
135 {
136 FREE_DH (s->adj);
138 }
139 if (s->colorVec != NULL)
140 {
141 FREE_DH (s->colorVec);
143 }
144 if (s->o2n_sub != NULL)
145 {
146 FREE_DH (s->o2n_sub);
148 }
149 if (s->n2o_sub != NULL)
150 {
151 FREE_DH (s->n2o_sub);
153 }
154
155 if (s->beg_row != NULL)
156 {
157 FREE_DH (s->beg_row);
159 }
160 if (s->beg_rowP != NULL)
161 {
162 FREE_DH (s->beg_rowP);
164 }
165 if (s->row_count != NULL)
166 {
167 FREE_DH (s->row_count);
169 }
170 if (s->bdry_count != NULL)
171 {
172 FREE_DH (s->bdry_count);
174 }
175 if (s->loNabors != NULL)
176 {
177 FREE_DH (s->loNabors);
179 }
180 if (s->hiNabors != NULL)
181 {
182 FREE_DH (s->hiNabors);
184 }
185 if (s->allNabors != NULL)
186 {
187 FREE_DH (s->allNabors);
189 }
190
191 if (s->n2o_row != NULL)
192 {
193 FREE_DH (s->n2o_row);
195 }
196 if (s->o2n_col != NULL)
197 {
198 FREE_DH (s->o2n_col);
200 }
201 if (s->o2n_ext != NULL)
202 {
205 }
206 if (s->n2o_ext != NULL)
207 {
210 }
211 FREE_DH (s);
214
215#undef __FUNC__
216#define __FUNC__ "SubdomainGraph_dhInit"
217void
219{
220 START_FUNC_DH double t1 = MPI_Wtime ();
221
222 if (blocks < 1)
223 blocks = 1;
224
225 if (np_dh == 1 || blocks > 1)
226 {
227 s->blocks = blocks;
228 init_seq_private (s, blocks, bj, A);
230 }
231 else
232 {
233 s->blocks = np_dh;
234 init_mpi_private (s, np_dh, bj, A);
236 }
237
238 s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
240
241
242#undef __FUNC__
243#define __FUNC__ "SubdomainGraph_dhFindOwner"
244int
246{
247 START_FUNC_DH int sd;
248 int *beg_row = s->beg_row;
249 int *row_count = s->row_count;
250 int owner = -1, blocks = s->blocks;
251
252 if (permuted)
253 beg_row = s->beg_rowP;
254
255 /* determine the subdomain that contains "idx" */
256 for (sd = 0; sd < blocks; ++sd)
257 {
258 if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
259 {
260 owner = sd;
261 break;
262 }
263 }
264
265 if (owner == -1)
266 {
267
268 fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx);
269 fprintf (stderr, "blocks= %i\n", blocks);
270
271 sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx);
272 SET_ERROR (-1, msgBuf_dh);
273 }
274
275END_FUNC_VAL (owner)}
276
277
278#undef __FUNC__
279#define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
280void
282{
283 START_FUNC_DH int i, j, k;
284 double max = 0, min = INT_MAX;
285
286 fprintf (fp,
287 "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
288 fprintf (fp, "colors used = %i\n", s->colors);
289 fprintf (fp, "subdomain count = %i\n", s->blocks);
290
291
292 fprintf (fp, "\ninterior/boundary node ratios:\n");
293
294 for (i = 0; i < s->blocks; ++i)
295 {
296 int inNodes = s->row_count[i] - s->bdry_count[i];
297 int bdNodes = s->bdry_count[i];
298 double ratio;
299
300 if (bdNodes == 0)
301 {
302 ratio = -1;
303 }
304 else
305 {
306 ratio = (double) inNodes / (double) bdNodes;
307 }
308
309 max = MAX (max, ratio);
310 min = MIN (min, ratio);
311 fprintf (fp,
312 " P_%i: first= %3i rowCount= %3i interior= %3i bdry= %3i ratio= %0.1f\n",
313 i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
314 ratio);
315 }
316
317
318 fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max);
319 fprintf (fp, "min interior/bdry ratio = %.1f\n", min);
320
321
322 /*-----------------------------------------
323 * subdomain graph
324 *-----------------------------------------*/
325 if (s->adj != NULL)
326 {
327 fprintf (fp, "\nunpermuted subdomain graph: \n");
328 for (i = 0; i < s->blocks; ++i)
329 {
330 fprintf (fp, "%i :: ", i);
331 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
332 {
333 fprintf (fp, "%i ", s->adj[j]);
334 }
335 fprintf (fp, "\n");
336 }
337 }
338
339
340 /*-----------------------------------------
341 * subdomain permutation
342 *-----------------------------------------*/
343 fprintf (fp, "\no2n subdomain permutation:\n");
344 for (i = 0; i < s->blocks; ++i)
345 {
346 fprintf (fp, " %i %i\n", i, s->o2n_sub[i]);
347 }
348 fprintf (fp, "\n");
349
350 if (np_dh > 1)
351 {
352
353 /*-----------------------------------------
354 * local n2o_row permutation
355 *-----------------------------------------*/
356 fprintf (fp, "\nlocal n2o_row permutation:\n ");
357 for (i = 0; i < s->row_count[myid_dh]; ++i)
358 {
359 fprintf (fp, "%i ", s->n2o_row[i]);
360 }
361 fprintf (fp, "\n");
362
363 /*-----------------------------------------
364 * local n2o permutation
365 *-----------------------------------------*/
366 fprintf (fp, "\nlocal o2n_col permutation:\n ");
367 for (i = 0; i < s->row_count[myid_dh]; ++i)
368 {
369 fprintf (fp, "%i ", s->o2n_col[i]);
370 }
371 fprintf (fp, "\n");
372
373 }
374 else
375 {
376 /*-----------------------------------------
377 * local n2o_row permutation
378 *-----------------------------------------*/
379 fprintf (fp, "\nlocal n2o_row permutation:\n");
380 fprintf (fp, "--------------------------\n");
381 for (k = 0; k < s->blocks; ++k)
382 {
383 int beg_row = s->beg_row[k];
384 int end_row = beg_row + s->row_count[k];
385
386 for (i = beg_row; i < end_row; ++i)
387 {
388 fprintf (fp, "%i ", s->n2o_row[i]);
389 }
390 fprintf (fp, "\n");
391 }
392
393 /*-----------------------------------------
394 * local n2o permutation
395 *-----------------------------------------*/
396 fprintf (fp, "\nlocal o2n_col permutation:\n");
397 fprintf (fp, "--------------------------\n");
398 for (k = 0; k < s->blocks; ++k)
399 {
400 int beg_row = s->beg_row[k];
401 int end_row = beg_row + s->row_count[k];
402
403 for (i = beg_row; i < end_row; ++i)
404 {
405 fprintf (fp, "%i ", s->o2n_col[i]);
406 }
407 fprintf (fp, "\n");
408 }
409
410
411 }
412
414
415#undef __FUNC__
416#define __FUNC__ "init_seq_private"
417void
418init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
419{
420 START_FUNC_DH int m, n, beg_row;
421 double t1;
422
423 /*-------------------------------------------------------
424 * get number of local rows (m), global rows (n), and
425 * global numbering of first locally owned row
426 * (for sequential, beg_row=0 and m == n
427 *-------------------------------------------------------*/
428 EuclidGetDimensions (A, &beg_row, &m, &n);
430 s->m = n;
431
432 /*-------------------------------------------------------
433 * allocate storage for all data structures
434 * EXCEPT s->adj and hash tables.
435 * (but note that hash tables aren't used for sequential)
436 *-------------------------------------------------------*/
439
440 /*-------------------------------------------------------------
441 * Fill in: beg_row[]
442 * beg_rowP[]
443 * row_count[]
444 * At this point, beg_rowP[] is a copy of beg_row[])
445 *-------------------------------------------------------------*/
446 {
447 int i;
448 int rpp = m / blocks;
449
450 if (rpp * blocks < m)
451 ++rpp;
452
453 s->beg_row[0] = 0;
454 for (i = 1; i < blocks; ++i)
455 s->beg_row[i] = rpp + s->beg_row[i - 1];
456 for (i = 0; i < blocks; ++i)
457 s->row_count[i] = rpp;
458 s->row_count[blocks - 1] = m - rpp * (blocks - 1);
459 }
460 memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int));
461
462
463 /*-----------------------------------------------------------------
464 * Find all neighboring processors in subdomain graph.
465 * This block fills in: allNabors[]
466 *-----------------------------------------------------------------*/
467 /* NA for sequential! */
468
469
470 /*-------------------------------------------------------
471 * Count number of interior nodes for each subdomain;
472 * also, form permutation vector to order boundary
473 * nodes last in each subdomain.
474 * This block fills in: bdry_count[]
475 * n2o_col[]
476 * o2n_row[]
477 *-------------------------------------------------------*/
478 t1 = MPI_Wtime ();
479 if (!bj)
480 {
483 }
484 else
485 {
486 int i;
487 for (i = 0; i < m; ++i)
488 {
489 s->n2o_row[i] = i;
490 s->o2n_col[i] = i;
491 }
492 }
493 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
494
495 /*-------------------------------------------------------
496 * Form subdomain graph,
497 * then color and reorder subdomain graph.
498 * This block fills in: ptr[]
499 * adj[]
500 * o2n_sub[]
501 * n2o_sub[]
502 * beg_rowP[]
503 *-------------------------------------------------------*/
504 t1 = MPI_Wtime ();
505 if (!bj)
506 {
509 if (s->doNotColor)
510 {
511 int i;
512 printf_dh ("subdomain coloring and reordering is OFF\n");
513 for (i = 0; i < blocks; ++i)
514 {
515 s->o2n_sub[i] = i;
516 s->n2o_sub[i] = i;
517 s->colorVec[i] = 0;
518 }
519 }
520 else
521 {
522 SET_INFO ("subdomain coloring and reordering is ON");
525 }
526 }
527
528 /* bj setup */
529 else
530 {
531 int i;
532 for (i = 0; i < blocks; ++i)
533 {
534 s->o2n_sub[i] = i;
535 s->n2o_sub[i] = i;
536 }
537 }
538 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
539
540 /*-------------------------------------------------------
541 * Here's a step we DON'T do for the parallel case:
542 * we need to adjust the matrix row and column perms
543 * to reflect subdomain reordering (for the parallel
544 * case, these permutation vectors are purely local and
545 * zero-based)
546 *-------------------------------------------------------*/
547 if (!bj)
548 {
551 }
552
553 /*-------------------------------------------------------
554 * Build lists of lower and higher ordered neighbors.
555 * This block fills in: loNabors[]
556 * hiNabors[]
557 *-------------------------------------------------------*/
558 /* NA for sequential */
559
560 /*-------------------------------------------------------
561 * Exchange boundary node permutation information with
562 * neighboring processors in the subdomain graph.
563 * This block fills in: o2n_ext (hash table)
564 * n2o_ext (hash table)
565 *-------------------------------------------------------*/
566 /* NA for sequential */
567
568
570
571
572#if 0
573#undef __FUNC__
574#define __FUNC__ "partition_metis_private"
575void
576partition_metis_private (SubdomainGraph_dh s, void *A)
577{
579 SET_V_ERROR ("not implemented");
581#endif
582
583#undef __FUNC__
584#define __FUNC__ "allocate_storage_private"
585void
587{
588 START_FUNC_DH if (!bj)
589 {
590 s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int));
592 s->ptrs[0] = 0;
593 s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int));
595 s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
597 s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
599 s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
601 }
602
603 s->n2o_row = (int *) MALLOC_DH (m * sizeof (int));
605 s->o2n_col = (int *) MALLOC_DH (m * sizeof (int));
607
608 /* these are probably only needed for single mpi task -- ?? */
609 /* nope; beg_row and row_ct are needed by ilu_mpi_bj; yuck! */
610 s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int));
612 s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int));
614 s->row_count = (int *) MALLOC_DH (blocks * sizeof (int));
616 s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int));
618 s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int));
620 s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int));
622
624
625/*-----------------------------------------------------------------*/
626
627
628#undef __FUNC__
629#define __FUNC__ "init_mpi_private"
630void
631init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
632{
633 START_FUNC_DH int m, n, beg_row;
634 bool symmetric;
635 double t1;
636
637 symmetric = Parser_dhHasSwitch (parser_dh, "-sym");
639 if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric"))
640 {
641 symmetric = true;
642 }
643
644 /*-------------------------------------------------------
645 * get number of local rows (m), global rows (n), and
646 * global numbering of first locally owned row
647 *-------------------------------------------------------*/
648 EuclidGetDimensions (A, &beg_row, &m, &n);
650 s->m = m;
651
652
653 /*-------------------------------------------------------
654 * allocate storage for all data structures
655 * EXCEPT s->adj and hash tables.
656 *-------------------------------------------------------*/
659
660 /*-------------------------------------------------------------
661 * Fill in: beg_row[]
662 * beg_rowP[]
663 * row_count[]
664 * At this point, beg_rowP[] is a copy of beg_row[])
665 *-------------------------------------------------------------*/
666 if (!bj)
667 {
668 MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
669 MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
670 memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int));
671 }
672 else
673 {
674 s->beg_row[myid_dh] = beg_row;
676 s->row_count[myid_dh] = m;
677 }
678
679 /*-----------------------------------------------------------------
680 * Find all neighboring processors in subdomain graph.
681 * This block fills in: allNabors[]
682 *-----------------------------------------------------------------*/
683 if (!bj)
684 {
685 t1 = MPI_Wtime ();
686 if (symmetric)
687 {
690 }
691 else
692 {
695 }
696 s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
697 }
698
699
700 /*-----------------------------------------------------------------
701 * determine which rows are boundary rows, and which are interior
702 * rows; also, form permutation vector to order interior
703 * nodes first within each subdomain
704 * This block fills in: bdry_count[]
705 * n2o_col[]
706 * o2n_row[]
707 *-----------------------------------------------------------------*/
708 t1 = MPI_Wtime ();
709 if (!bj)
710 {
711 int *interiorNodes, *bdryNodes;
712 int interiorCount, bdryCount;
713 int *o2n = s->o2n_col, idx;
714 int i;
715
716 interiorNodes = (int *) MALLOC_DH (m * sizeof (int));
718 bdryNodes = (int *) MALLOC_DH (m * sizeof (int));
720
721 /* divide this subdomain's rows into interior and boundary rows;
722 the returned lists are with respect to local numbering.
723 */
724 if (symmetric)
725 {
727 interiorNodes, bdryNodes,
728 &interiorCount, &bdryCount);
730 }
731 else
732 {
734 interiorNodes, bdryNodes,
735 &interiorCount, &bdryCount);
737 }
738
739 /* exchange number of boundary rows with all neighbors */
740 MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
741 comm_dh);
742
743 /* form local permutation */
744 idx = 0;
745 for (i = 0; i < interiorCount; ++i)
746 {
747 o2n[interiorNodes[i]] = idx++;
748 }
749 for (i = 0; i < bdryCount; ++i)
750 {
751 o2n[bdryNodes[i]] = idx++;
752 }
753
754 /* invert permutation */
755 invert_perm (m, o2n, s->n2o_row);
757
758 FREE_DH (interiorNodes);
760 FREE_DH (bdryNodes);
762 }
763
764 /* bj setup */
765 else
766 {
767 int *o2n = s->o2n_col, *n2o = s->n2o_row;
768 int i, m = s->m;
769
770 for (i = 0; i < m; ++i)
771 {
772 o2n[i] = i;
773 n2o[i] = i;
774 }
775 }
776 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
777
778 /*-------------------------------------------------------
779 * Form subdomain graph,
780 * then color and reorder subdomain graph.
781 * This block fills in: ptr[]
782 * adj[]
783 * o2n_sub[]
784 * n2o_sub[]
785 * beg_rowP[]
786 *-------------------------------------------------------*/
787 if (!bj)
788 {
789 t1 = MPI_Wtime ();
792 if (s->doNotColor)
793 {
794 int i;
795 printf_dh ("subdomain coloring and reordering is OFF\n");
796 for (i = 0; i < blocks; ++i)
797 {
798 s->o2n_sub[i] = i;
799 s->n2o_sub[i] = i;
800 s->colorVec[i] = 0;
801 }
802 }
803 else
804 {
805 SET_INFO ("subdomain coloring and reordering is ON");
808 }
809 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
810 }
811
812 /*-------------------------------------------------------
813 * Build lists of lower and higher ordered neighbors.
814 * This block fills in: loNabors[]
815 * hiNabors[]
816 *-------------------------------------------------------*/
817 if (!bj)
818 {
821 }
822
823 /*-------------------------------------------------------
824 * Exchange boundary node permutation information with
825 * neighboring processors in the subdomain graph.
826 * This block fills in: o2n_ext (hash table)
827 * n2o_ext (hash table)
828 *-------------------------------------------------------*/
829 if (!bj)
830 {
831 t1 = MPI_Wtime ();
834 s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
835 }
836
838
839
840
841#undef __FUNC__
842#define __FUNC__ "SubdomainGraph_dhExchangePerms"
843void
845{
846 START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
847 MPI_Status *status = NULL;
848 int *nabors = s->allNabors, naborCount = s->allCount;
849 int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
850 int m = s->row_count[myid_dh];
851 int beg_row = s->beg_row[myid_dh];
852 int beg_rowP = s->beg_rowP[myid_dh];
853 int *bdryNodeCounts = s->bdry_count;
854 int myBdryCount = s->bdry_count[myid_dh];
855 bool debug = false;
856 int myFirstBdry = m - myBdryCount;
857 int *n2o_row = s->n2o_row;
858 Hash_i_dh n2o_table, o2n_table;
859
860 if (logFile != NULL && s->debug)
861 debug = true;
862
863 /* allocate send buffer, and copy permutation info to buffer;
864 each entry is a <old_value, new_value> pair.
865 */
866 sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int));
868
869
870 if (debug)
871 {
872 fprintf (logFile,
873 "\nSUBG myFirstBdry= %i myBdryCount= %i m= %i beg_rowP= %i\n",
874 1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
875 fflush (logFile);
876 }
877
878 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
879 {
880 sendBuf[2 * j] = n2o_row[i] + beg_row;
881 sendBuf[2 * j + 1] = i + beg_rowP;
882 }
883
884 if (debug)
885 {
886 fprintf (logFile, "\nSUBG SEND_BUF:\n");
887 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
888 {
889 fprintf (logFile, "SUBG %i, %i\n", 1 + sendBuf[2 * j],
890 1 + sendBuf[2 * j + 1]);
891 }
892 fflush (logFile);
893 }
894
895 /* allocate a receive buffer for each nabor in the subdomain graph,
896 and set up index array for locating the beginning of each
897 nabor's buffers.
898 */
899 naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int));
901 naborIdx[0] = 0;
902 nz = 0;
903 for (i = 0; i < naborCount; ++i)
904 {
905 nz += (2 * bdryNodeCounts[nabors[i]]);
906 naborIdx[i + 1] = nz;
907 }
908
909
910 recvBuf = (int *) MALLOC_DH (nz * sizeof (int));
912
913
914/* for (i=0; i<nz; ++i) recvBuf[i] = -10; */
915
916 /* perform sends and receives */
917 recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
919 send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
921 status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status));
923
924 for (i = 0; i < naborCount; ++i)
925 {
926 int nabr = nabors[i];
927 int *buf = recvBuf + naborIdx[i];
928 int ct = 2 * bdryNodeCounts[nabr];
929
930
931 MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
932 &(send_req[i]));
933
934 if (debug)
935 {
936 fprintf (logFile, "SUBG sending %i elts to %i\n", 2 * myBdryCount,
937 nabr);
938 fflush (logFile);
939 }
940
941 MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
942
943 if (debug)
944 {
945 fprintf (logFile, "SUBG receiving %i elts from %i\n", ct, nabr);
946 fflush (logFile);
947 }
948 }
949
950 MPI_Waitall (naborCount, send_req, status);
951 MPI_Waitall (naborCount, recv_req, status);
952
953 Hash_i_dhCreate (&n2o_table, nz / 2);
955 Hash_i_dhCreate (&o2n_table, nz / 2);
957 s->n2o_ext = n2o_table;
958 s->o2n_ext = o2n_table;
959
960 /* insert non-local boundary node permutations in lookup tables */
961 for (i = 0; i < nz; i += 2)
962 {
963 int old = recvBuf[i];
964 int new = recvBuf[i + 1];
965
966 if (debug)
967 {
968 fprintf (logFile, "SUBG i= %i old= %i new= %i\n", i, old + 1,
969 new + 1);
970 fflush (logFile);
971 }
972
973 Hash_i_dhInsert (o2n_table, old, new);
975 Hash_i_dhInsert (n2o_table, new, old);
977 }
978
979
980 if (recvBuf != NULL)
981 {
982 FREE_DH (recvBuf);
984 }
985 if (naborIdx != NULL)
986 {
987 FREE_DH (naborIdx);
989 }
990 if (sendBuf != NULL)
991 {
992 FREE_DH (sendBuf);
994 }
995 if (recv_req != NULL)
996 {
997 FREE_DH (recv_req);
999 }
1000 if (send_req != NULL)
1001 {
1002 FREE_DH (send_req);
1004 }
1005 if (status != NULL)
1006 {
1007 FREE_DH (status);
1009 }
1010
1012
1013
1014
1015#undef __FUNC__
1016#define __FUNC__ "form_subdomaingraph_mpi_private"
1017void
1019{
1020 START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount;
1021 int *idxAll = NULL;
1022 int i, j, nz, *adj, *ptrs = s->ptrs;
1023 MPI_Request *recvReqs = NULL, sendReq;
1024 MPI_Status *statuses = NULL, status;
1025
1026 /* all processors tell root how many nabors they have */
1027 if (myid_dh == 0)
1028 {
1029 idxAll = (int *) MALLOC_DH (np_dh * sizeof (int));
1031 }
1032 MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
1033
1034 /* root counts edges in graph, and broacasts to all */
1035 if (myid_dh == 0)
1036 {
1037 nz = 0;
1038 for (i = 0; i < np_dh; ++i)
1039 nz += idxAll[i];
1040 }
1041 MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
1042
1043 /* allocate space for adjacency lists (memory for the
1044 pointer array was previously allocated)
1045 */
1046 adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int));
1048
1049 /* root receives adjacency lists from all processors */
1050 if (myid_dh == 0)
1051 {
1052 recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
1054 statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
1056
1057 /* first, set up row pointer array */
1058 ptrs[0] = 0;
1059 for (j = 0; j < np_dh; ++j)
1060 ptrs[j + 1] = ptrs[j] + idxAll[j];
1061
1062 /* second, start the receives */
1063 for (j = 0; j < np_dh; ++j)
1064 {
1065 int ct = idxAll[j];
1066
1067 MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
1068 recvReqs + j);
1069 }
1070 }
1071
1072 /* all processors send root their adjacency list */
1073 MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
1074
1075 /* wait for comms to go through */
1076 if (myid_dh == 0)
1077 {
1078 MPI_Waitall (np_dh, recvReqs, statuses);
1079 }
1080 MPI_Wait (&sendReq, &status);
1081
1082 /* root broadcasts assembled subdomain graph to all processors */
1083 MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
1084 MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
1085
1086 if (idxAll != NULL)
1087 {
1088 FREE_DH (idxAll);
1090 }
1091 if (recvReqs != NULL)
1092 {
1093 FREE_DH (recvReqs);
1095 }
1096 if (statuses != NULL)
1097 {
1098 FREE_DH (statuses);
1100 }
1101
1103
1104/* this is ugly and inefficient; but seq mode is primarily
1105 for debugging and testing, so there.
1106*/
1107#undef __FUNC__
1108#define __FUNC__ "form_subdomaingraph_seq_private"
1109void
1111{
1112 START_FUNC_DH int *dense, i, j, row, blocks = s->blocks;
1113 int *cval, len, *adj;
1114 int idx = 0, *ptrs = s->ptrs;
1115
1116 /* allocate storage for adj[]; since this function is intended
1117 for debugging/testing, and the number of blocks should be
1118 relatively small, we'll punt and allocate the maximum
1119 possibly needed.
1120 */
1121 adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
1123
1124 dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
1126 for (i = 0; i < blocks * blocks; ++i)
1127 dense[i] = 0;
1128
1129 /* loop over each block's rows to identify all boundary nodes */
1130 for (i = 0; i < blocks; ++i)
1131 {
1132 int beg_row = s->beg_row[i];
1133 int end_row = beg_row + s->row_count[i];
1134
1135 for (row = beg_row; row < end_row; ++row)
1136 {
1137 EuclidGetRow (A, row, &len, &cval, NULL);
1139 for (j = 0; j < len; ++j)
1140 {
1141 int col = cval[j];
1142 if (col < beg_row || col >= end_row)
1143 {
1144 int owner = SubdomainGraph_dhFindOwner (s, col, false);
1146 dense[i * blocks + owner] = 1;
1147 dense[owner * blocks + i] = 1;
1148 }
1149 }
1150 EuclidRestoreRow (A, row, &len, &cval, NULL);
1152 }
1153 }
1154
1155 /* form sparse csr representation of subdomain graph
1156 from dense representation
1157 */
1158 ptrs[0] = 0;
1159 for (i = 0; i < blocks; ++i)
1160 {
1161 for (j = 0; j < blocks; ++j)
1162 {
1163 if (dense[i * blocks + j])
1164 {
1165 adj[idx++] = j;
1166 }
1167 }
1168 ptrs[i + 1] = idx;
1169 }
1170
1171 FREE_DH (dense);
1174
1175
1176#undef __FUNC__
1177#define __FUNC__ "find_all_neighbors_sym_private"
1178void
1180{
1181 START_FUNC_DH int *marker, i, j, beg_row, end_row;
1182 int row, len, *cval, ct = 0;
1183 int *nabors = s->allNabors;
1184
1185 marker = (int *) MALLOC_DH (m * sizeof (int));
1187 for (i = 0; i < m; ++i)
1188 marker[i] = 0;
1189
1190 SET_INFO
1191 ("finding nabors in subdomain graph for structurally symmetric matrix");
1192 SET_INFO ("(if this isn't what you want, use '-sym 0' switch)");
1193
1194 beg_row = s->beg_row[myid_dh];
1195 end_row = beg_row + s->row_count[myid_dh];
1196
1197 for (row = beg_row; row < end_row; ++row)
1198 {
1199 EuclidGetRow (A, row, &len, &cval, NULL);
1201 for (j = 0; j < len; ++j)
1202 {
1203 int col = cval[j];
1204 if (col < beg_row || col >= end_row)
1205 {
1206 int owner = SubdomainGraph_dhFindOwner (s, col, false);
1208 if (!marker[owner])
1209 {
1210 marker[owner] = 1;
1211 nabors[ct++] = owner;
1212 }
1213 }
1214 }
1215 EuclidRestoreRow (A, row, &len, &cval, NULL);
1217 }
1218 s->allCount = ct;
1219
1220/* fprintf(logFile, "@@@@@ allCount= %i\n", ct); */
1221
1222 if (marker != NULL)
1223 {
1224 FREE_DH (marker);
1226 }
1228
1229#undef __FUNC__
1230#define __FUNC__ "find_all_neighbors_unsym_private"
1231void
1233{
1234 START_FUNC_DH int i, j, row, beg_row, end_row;
1235 int *marker;
1236 int *cval, len, idx = 0;
1237 int nz, *nabors = s->allNabors, *myNabors;
1238
1239 myNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
1241 marker = (int *) MALLOC_DH (np_dh * sizeof (int));
1243 for (i = 0; i < np_dh; ++i)
1244 marker[i] = 0;
1245
1246 SET_INFO
1247 ("finding nabors in subdomain graph for structurally unsymmetric matrix");
1248
1249 /* loop over this block's boundary rows, finding all nabors in
1250 subdomain graph
1251 */
1252 beg_row = s->beg_row[myid_dh];
1253 end_row = beg_row + s->row_count[myid_dh];
1254
1255
1256
1257 /*for each locally owned row ... */
1258 for (row = beg_row; row < end_row; ++row)
1259 {
1260 EuclidGetRow (A, row, &len, &cval, NULL);
1262 for (j = 0; j < len; ++j)
1263 {
1264 int col = cval[j];
1265 /*for each column that corresponds to a non-locally owned row ... */
1266 if (col < beg_row || col >= end_row)
1267 {
1268 int owner = SubdomainGraph_dhFindOwner (s, col, false);
1270 /*if I've not yet done so ... */
1271 if (!marker[owner])
1272 {
1273 marker[owner] = 1;
1274 /*append the non-local row's owner in to the list of my nabors
1275 in the subdomain graph */
1276 myNabors[idx++] = owner;
1277 }
1278 }
1279 }
1280 EuclidRestoreRow (A, row, &len, &cval, NULL);
1282 }
1283
1284 /*
1285 at this point, idx = the number of my neighbors in the subdomain
1286 graph; equivalently, idx is the number of meaningfull slots in
1287 the myNabors array. -dah 1/31/06
1288 */
1289
1290 /*
1291 at this point: marker[j] = 0 indicates that processor j is NOT my nabor
1292 marker[j] = 1 indicates that processor j IS my nabor
1293 however, there may be some nabors that can't be discovered in the above loop
1294 "//for each locally owned row;" this can happen if the matrix is
1295 structurally unsymmetric.
1296 -dah 1/31/06
1297 */
1298
1299/* fprintf(stderr, "[%i] marker: ", myid_dh);
1300for (j=0; j<np_dh; j++) {
1301 fprintf(stderr, "[%i] (j=%d) %d\n", myid_dh, j, marker[j]);
1302}
1303fprintf(stderr, "\n");
1304*/
1305
1306 /* find out who my neighbors are that I cannot discern locally */
1307 MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
1309
1310 /* add in neighbors that I know about from scanning my adjacency lists */
1311 for (i = 0; i < idx; ++i)
1312 nabors[myNabors[i]] = 1;
1313
1314 /* remove self from the adjacency list */
1315 nabors[myid_dh] = 0;
1316
1317 /*
1318 at this point: marker[j] = 0 indicates that processor j is NOT my nabor
1319 marker[j] = 1 indicates that processor j IS my nabor
1320 and this is guaranteed to be complete.
1321 */
1322
1323 /* form final list of neighboring processors */
1324 nz = 0;
1325 for (i = 0; i < np_dh; ++i)
1326 {
1327 if (nabors[i])
1328 myNabors[nz++] = i;
1329 }
1330 s->allCount = nz;
1331 memcpy (nabors, myNabors, nz * sizeof (int));
1332
1333 if (marker != NULL)
1334 {
1335 FREE_DH (marker);
1337 }
1338 if (myNabors != NULL)
1339 {
1340 FREE_DH (myNabors);
1342 }
1344
1345/*================================================================*/
1346
1347#undef __FUNC__
1348#define __FUNC__ "find_bdry_nodes_sym_private"
1349void
1351 int *interiorNodes, int *bdryNodes,
1352 int *interiorCount, int *bdryCount)
1353{
1355 int end_row = beg_row + s->row_count[myid_dh];
1356 int row, inCt = 0, bdCt = 0;
1357
1358 int j;
1359 int *cval;
1360
1361 /* determine if the row is a boundary row */
1362 for (row = beg_row; row < end_row; ++row)
1363 { /* for each row in the subdomain */
1364 bool isBdry = false;
1365 int len;
1366 EuclidGetRow (A, row, &len, &cval, NULL);
1368
1369 for (j = 0; j < len; ++j)
1370 { /* for each column in the row */
1371 int col = cval[j];
1372 if (col < beg_row || col >= end_row)
1373 {
1374 isBdry = true;
1375 break;
1376 }
1377 }
1378 EuclidRestoreRow (A, row, &len, &cval, NULL);
1380
1381 if (isBdry)
1382 {
1383 bdryNodes[bdCt++] = row - beg_row;
1384 }
1385 else
1386 {
1387 interiorNodes[inCt++] = row - beg_row;
1388 }
1389 }
1390
1391 *interiorCount = inCt;
1392 *bdryCount = bdCt;
1393
1395
1396#define BDRY_NODE_TAG 42
1397
1398#undef __FUNC__
1399#define __FUNC__ "find_bdry_nodes_unsym_private"
1400void
1402 int *interiorNodes, int *boundaryNodes,
1403 int *interiorCount, int *bdryCount)
1404{
1406 int end_row = beg_row + s->row_count[myid_dh];
1407 int i, j, row, max;
1408 int *cval;
1409 int *list, count;
1410 int *rpIN = NULL, *rpOUT = NULL;
1411 int *sendBuf, *recvBuf;
1412 int *marker, inCt, bdCt;
1413 int *bdryNodes, nz;
1414 int sendCt, recvCt;
1415 MPI_Request *sendReq, *recvReq;
1416 MPI_Status *status;
1417 SortedSet_dh ss;
1418
1419 SortedSet_dhCreate (&ss, m);
1421
1422 /*-----------------------------------------------------
1423 * identify all boundary nodes possible using locally
1424 * owned adjacency lists
1425 *-----------------------------------------------------*/
1426 for (row = beg_row; row < end_row; ++row)
1427 {
1428 bool isBdry = false;
1429 int len;
1430 EuclidGetRow (A, row, &len, &cval, NULL);
1432
1433 for (j = 0; j < len; ++j)
1434 {
1435 int col = cval[j];
1436 if (col < beg_row || col >= end_row)
1437 {
1438 isBdry = true; /* this row is a boundary node */
1439 SortedSet_dhInsert (ss, col);
1441 /* the row "col" is also a boundary node */
1442 }
1443 }
1444 EuclidRestoreRow (A, row, &len, &cval, NULL);
1446
1447 if (isBdry)
1448 {
1449 SortedSet_dhInsert (ss, row);
1451 }
1452 }
1453
1454 /*-----------------------------------------------------
1455 * scan the sorted list to determine what boundary
1456 * node information to send to whom
1457 *-----------------------------------------------------*/
1458 sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
1460 recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
1462 rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
1464 rpOUT[0] = 0;
1465 for (i = 0; i < np_dh; ++i)
1466 sendBuf[i] = 0;
1467
1468 sendCt = 0; /* total number of processor to whom we will send lists */
1469 SortedSet_dhGetList (ss, &list, &count);
1471
1472 for (i = 0; i < count; /* i is set below */ )
1473 {
1474 int node = list[i];
1475 int owner;
1476 int last;
1477
1478 owner = SubdomainGraph_dhFindOwner (s, node, false);
1480 last = s->beg_row[owner] + s->row_count[owner];
1481
1482 /* determine the other boundary nodes that belong to owner */
1483 while ((i < count) && (list[i] < last))
1484 ++i;
1485 ++sendCt;
1486 rpOUT[sendCt] = i;
1487 sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
1488
1489 }
1490
1491 /*-----------------------------------------------------
1492 * processors tell each other how much information
1493 * each will send to whom
1494 *-----------------------------------------------------*/
1495 MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
1497
1498 /*-----------------------------------------------------
1499 * exchange boundary node information
1500 * (note that we also exchange information with ourself!)
1501 *-----------------------------------------------------*/
1502
1503 /* first, set up data structures to hold incoming information */
1504 rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
1506 rpIN[0] = 0;
1507 nz = 0;
1508 recvCt = 0;
1509 for (i = 0; i < np_dh; ++i)
1510 {
1511 if (recvBuf[i])
1512 {
1513 ++recvCt;
1514 nz += recvBuf[i];
1515 rpIN[recvCt] = nz;
1516 }
1517 }
1518 bdryNodes = (int *) MALLOC_DH (nz * sizeof (int));
1520 sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request));
1522 recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request));
1524 max = MAX (sendCt, recvCt);
1525 status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status));
1527
1528 /* second, start receives for incoming data */
1529 j = 0;
1530 for (i = 0; i < np_dh; ++i)
1531 {
1532 if (recvBuf[i])
1533 {
1534 MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
1535 i, BDRY_NODE_TAG, comm_dh, recvReq + j);
1536 ++j;
1537 }
1538 }
1539
1540 /* third, start sends for outgoing data */
1541 j = 0;
1542 for (i = 0; i < np_dh; ++i)
1543 {
1544 if (sendBuf[i])
1545 {
1546 MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
1547 i, BDRY_NODE_TAG, comm_dh, sendReq + j);
1548 ++j;
1549 }
1550 }
1551
1552 /* fourth, wait for all comms to finish */
1553 MPI_Waitall (sendCt, sendReq, status);
1554 MPI_Waitall (recvCt, recvReq, status);
1555
1556 /* fifth, convert from global to local indices */
1557 for (i = 0; i < nz; ++i)
1558 bdryNodes[i] -= beg_row;
1559
1560 /*-----------------------------------------------------
1561 * consolidate information from all processors to
1562 * identify all local boundary nodes
1563 *-----------------------------------------------------*/
1564 marker = (int *) MALLOC_DH (m * sizeof (int));
1566 for (i = 0; i < m; ++i)
1567 marker[i] = 0;
1568 for (i = 0; i < nz; ++i)
1569 marker[bdryNodes[i]] = 1;
1570
1571 inCt = bdCt = 0;
1572 for (i = 0; i < m; ++i)
1573 {
1574 if (marker[i])
1575 {
1576 boundaryNodes[bdCt++] = i;
1577 }
1578 else
1579 {
1580 interiorNodes[inCt++] = i;
1581 }
1582 }
1583 *interiorCount = inCt;
1584 *bdryCount = bdCt;
1585
1586 /*-----------------------------------------------------
1587 * clean up
1588 *-----------------------------------------------------*/
1591 if (rpIN != NULL)
1592 {
1593 FREE_DH (rpIN);
1595 }
1596 if (rpOUT != NULL)
1597 {
1598 FREE_DH (rpOUT);
1600 }
1601 if (sendBuf != NULL)
1602 {
1603 FREE_DH (sendBuf);
1605 }
1606 if (recvBuf != NULL)
1607 {
1608 FREE_DH (recvBuf);
1610 }
1611 if (bdryNodes != NULL)
1612 {
1613 FREE_DH (bdryNodes);
1615 }
1616 if (marker != NULL)
1617 {
1618 FREE_DH (marker);
1620 }
1621 if (sendReq != NULL)
1622 {
1623 FREE_DH (sendReq);
1625 }
1626 if (recvReq != NULL)
1627 {
1628 FREE_DH (recvReq);
1630 }
1631 if (status != NULL)
1632 {
1633 FREE_DH (status);
1635 }
1637
1638
1639#undef __FUNC__
1640#define __FUNC__ "find_ordered_neighbors_private"
1641void
1643{
1645 int *hiNabors = s->hiNabors;
1646 int *allNabors = s->allNabors, allCount = s->allCount;
1647 int loCt = 0, hiCt = 0;
1648 int *o2n = s->o2n_sub;
1649 int i, myNewId = o2n[myid_dh];
1650
1651 for (i = 0; i < allCount; ++i)
1652 {
1653 int nabor = allNabors[i];
1654 if (o2n[nabor] < myNewId)
1655 {
1656 loNabors[loCt++] = nabor;
1657 }
1658 else
1659 {
1660 hiNabors[hiCt++] = nabor;
1661 }
1662 }
1663
1664 s->loCount = loCt;
1665 s->hiCount = hiCt;
1667
1668
1669#undef __FUNC__
1670#define __FUNC__ "color_subdomain_graph_private"
1671void
1673{
1674 START_FUNC_DH int i, n = np_dh;
1675 int *rp = s->ptrs, *cval = s->adj;
1676 int j, *marker, thisNodesColor, *colorCounter;
1677 int *o2n = s->o2n_sub;
1678 int *color = s->colorVec;
1679
1680 if (np_dh == 1)
1681 n = s->blocks;
1682
1683 marker = (int *) MALLOC_DH ((n + 1) * sizeof (int));
1684 colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int));
1685 for (i = 0; i <= n; ++i)
1686 {
1687 marker[i] = -1;
1688 colorCounter[i] = 0;
1689 }
1690
1691 /*------------------------------------------------------------------
1692 * color the nodes
1693 *------------------------------------------------------------------*/
1694 for (i = 0; i < n; ++i)
1695 { /* color node "i" */
1696 /* mark colors of "i"s nabors as unavailable;
1697 only need to mark nabors that are (per the input ordering)
1698 numbered less than "i."
1699 */
1700 for (j = rp[i]; j < rp[i + 1]; ++j)
1701 {
1702 int nabor = cval[j];
1703 if (nabor < i)
1704 {
1705 int naborsColor = color[nabor];
1706 marker[naborsColor] = i;
1707 }
1708 }
1709
1710 /* assign vertex i the "smallest" possible color */
1711 thisNodesColor = -1;
1712 for (j = 0; j < n; ++j)
1713 {
1714 if (marker[j] != i)
1715 {
1716 thisNodesColor = j;
1717 break;
1718 }
1719 }
1720 color[i] = thisNodesColor;
1721 colorCounter[1 + thisNodesColor] += 1;
1722 }
1723
1724 /*------------------------------------------------------------------
1725 * build ordering vector; if two nodes are similarly colored,
1726 * they will have the same relative ordering as before.
1727 *------------------------------------------------------------------*/
1728 /* prefix-sum to find lowest-numbered node for each color */
1729 for (i = 1; i < n; ++i)
1730 {
1731 if (colorCounter[i] == 0)
1732 break;
1733 colorCounter[i] += colorCounter[i - 1];
1734 }
1735
1736 for (i = 0; i < n; ++i)
1737 {
1738 o2n[i] = colorCounter[color[i]];
1739 colorCounter[color[i]] += 1;
1740 }
1741
1742 /* invert permutation */
1743 invert_perm (n, s->o2n_sub, s->n2o_sub);
1745
1746
1747 /*------------------------------------------------------------------
1748 * count the number of colors used
1749 *------------------------------------------------------------------*/
1750 {
1751 int ct = 0;
1752 for (j = 0; j < n; ++j)
1753 {
1754 if (marker[j] == -1)
1755 break;
1756 ++ct;
1757 }
1758 s->colors = ct;
1759 }
1760
1761
1762 /*------------------------------------------------------------------
1763 * (re)build the beg_rowP array
1764 *------------------------------------------------------------------*/
1765 {
1766 int sum = 0;
1767 for (i = 0; i < n; ++i)
1768 {
1769 int old = s->n2o_sub[i];
1770 s->beg_rowP[old] = sum;
1771 sum += s->row_count[old];
1772 }
1773 }
1774
1775 FREE_DH (marker);
1777 FREE_DH (colorCounter);
1780
1781
1782#undef __FUNC__
1783#define __FUNC__ "SubdomainGraph_dhDump"
1784void
1786{
1787 START_FUNC_DH int i;
1788 int sCt = np_dh;
1789 FILE *fp;
1790
1791 if (np_dh == 1)
1792 sCt = s->blocks;
1793
1794
1795 /* ---------------------------------------------------------
1796 * for seq and par runs, 1st processor prints information
1797 * that is common to all processors
1798 * ---------------------------------------------------------*/
1799 fp = openFile_dh (filename, "w");
1801
1802 /* write subdomain ordering permutations */
1803 fprintf (fp, "----- colors used\n");
1804 fprintf (fp, "%i\n", s->colors);
1805 if (s->colorVec == NULL)
1806 {
1807 fprintf (fp, "s->colorVec == NULL\n");
1808 }
1809 else
1810 {
1811 fprintf (fp, "----- colorVec\n");
1812 for (i = 0; i < sCt; ++i)
1813 {
1814 fprintf (fp, "%i ", s->colorVec[i]);
1815 }
1816 fprintf (fp, "\n");
1817 }
1818
1819 if (s->o2n_sub == NULL || s->o2n_sub == NULL)
1820 {
1821 fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n");
1822 }
1823 else
1824 {
1825 fprintf (fp, "----- o2n_sub\n");
1826 for (i = 0; i < sCt; ++i)
1827 {
1828 fprintf (fp, "%i ", s->o2n_sub[i]);
1829 }
1830 fprintf (fp, "\n");
1831 fprintf (fp, "----- n2o_sub\n");
1832 for (i = 0; i < sCt; ++i)
1833 {
1834 fprintf (fp, "%i ", s->n2o_sub[i]);
1835 }
1836 fprintf (fp, "\n");
1837 }
1838
1839 /* write begin row arrays */
1840 if (s->beg_row == NULL || s->beg_rowP == NULL)
1841 {
1842 fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n");
1843 }
1844 else
1845 {
1846 fprintf (fp, "----- beg_row\n");
1847 for (i = 0; i < sCt; ++i)
1848 {
1849 fprintf (fp, "%i ", 1 + s->beg_row[i]);
1850 }
1851 fprintf (fp, "\n");
1852 fprintf (fp, "----- beg_rowP\n");
1853 for (i = 0; i < sCt; ++i)
1854 {
1855 fprintf (fp, "%i ", 1 + s->beg_rowP[i]);
1856 }
1857 fprintf (fp, "\n");
1858 }
1859
1860 /* write row count and bdry count arrays */
1861 if (s->row_count == NULL || s->bdry_count == NULL)
1862 {
1863 fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n");
1864 }
1865 else
1866 {
1867 fprintf (fp, "----- row_count\n");
1868 for (i = 0; i < sCt; ++i)
1869 {
1870 fprintf (fp, "%i ", s->row_count[i]);
1871 }
1872 fprintf (fp, "\n");
1873 fprintf (fp, "----- bdry_count\n");
1874 for (i = 0; i < sCt; ++i)
1875 {
1876 fprintf (fp, "%i ", s->bdry_count[i]);
1877 }
1878 fprintf (fp, "\n");
1879
1880 }
1881
1882 /* write subdomain graph */
1883 if (s->ptrs == NULL || s->adj == NULL)
1884 {
1885 fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n");
1886 }
1887 else
1888 {
1889 int j;
1890 int ct;
1891 fprintf (fp, "----- subdomain graph\n");
1892 for (i = 0; i < sCt; ++i)
1893 {
1894 fprintf (fp, "%i :: ", i);
1895 ct = s->ptrs[i + 1] - s->ptrs[i];
1896 if (ct)
1897 {
1898 shellSort_int (ct, s->adj + s->ptrs[i]);
1900 }
1901 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
1902 {
1903 fprintf (fp, "%i ", s->adj[j]);
1904 }
1905 fprintf (fp, "\n");
1906 }
1907 }
1908 closeFile_dh (fp);
1910
1911 /* ---------------------------------------------------------
1912 * next print info that differs across processors for par
1913 * trials. deal with this as two cases: seq and par
1914 * ---------------------------------------------------------*/
1915 if (s->beg_rowP == NULL)
1916 {
1917 SET_V_ERROR ("s->beg_rowP == NULL; can't continue");
1918 }
1919 if (s->row_count == NULL)
1920 {
1921 SET_V_ERROR ("s->row_count == NULL; can't continue");
1922 }
1923 if (s->o2n_sub == NULL)
1924 {
1925 SET_V_ERROR ("s->o2n_sub == NULL; can't continue");
1926 }
1927
1928
1929 if (np_dh == 1)
1930 {
1931 fp = openFile_dh (filename, "a");
1933
1934 /* write n2o_row and o2n_col */
1935 if (s->n2o_row == NULL || s->o2n_col == NULL)
1936 {
1937 fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n");
1938 }
1939 else
1940 {
1941 fprintf (fp, "----- n2o_row\n");
1942 for (i = 0; i < s->m; ++i)
1943 {
1944 fprintf (fp, "%i ", 1 + s->n2o_row[i]);
1945 }
1946 fprintf (fp, "\n");
1947
1948#if 0
1949/*
1950note: this won't match the parallel case, since
1951 parallel permutation vecs are zero-based and purely
1952 local
1953*/
1954
1955 fprintf (fp, "----- o2n_col\n");
1956 for (i = 0; i < sCt; ++i)
1957 {
1958 int br = s->beg_row[i];
1959 int er = br + s->row_count[i];
1960
1961 for (j = br; j < er; ++j)
1962 {
1963 fprintf (fp, "%i ", 1 + s->o2n_col[j]);
1964 }
1965 fprintf (fp, "\n");
1966 }
1967 fprintf (fp, "\n");
1968
1969#endif
1970
1971 }
1972 closeFile_dh (fp);
1974 }
1975
1976 /* parallel case */
1977 else
1978 {
1979 int id = s->n2o_sub[myid_dh];
1980 int m = s->m;
1981 int pe;
1982 int beg_row = 0;
1983 if (s->beg_row != 0)
1984 beg_row = s->beg_row[myid_dh];
1985
1986 /* write n2o_row */
1987 for (pe = 0; pe < np_dh; ++pe)
1988 {
1989 MPI_Barrier (comm_dh);
1990 if (id == pe)
1991 {
1992 fp = openFile_dh (filename, "a");
1994 if (id == 0)
1995 fprintf (fp, "----- n2o_row\n");
1996
1997 for (i = 0; i < m; ++i)
1998 {
1999 fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row);
2000 }
2001 if (id == np_dh - 1)
2002 fprintf (fp, "\n");
2003 closeFile_dh (fp);
2005 }
2006 }
2007
2008#if 0
2009
2010 /* write o2n_col */
2011 for (pe = 0; pe < np_dh; ++pe)
2012 {
2013 MPI_Barrier (comm_dh);
2014 if (myid_dh == pe)
2015 {
2016 fp = openFile_dh (filename, "a");
2018 if (myid_dh == 0)
2019 fprintf (fp, "----- o2n_col\n");
2020
2021 for (i = 0; i < m; ++i)
2022 {
2023 fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row);
2024 }
2025 fprintf (fp, "\n");
2026
2027 if (myid_dh == np_dh - 1)
2028 fprintf (fp, "\n");
2029
2030 closeFile_dh (fp);
2032 }
2033 }
2034
2035#endif
2036
2037 }
2038
2040
2041
2042
2043#undef __FUNC__
2044#define __FUNC__ "find_bdry_nodes_seq_private"
2045void
2047{
2048 START_FUNC_DH int i, j, row, blocks = s->blocks;
2049 int *cval, *tmp;
2050
2051 tmp = (int *) MALLOC_DH (m * sizeof (int));
2053 for (i = 0; i < m; ++i)
2054 tmp[i] = 0;
2055
2056 /*------------------------------------------
2057 * mark all boundary nodes
2058 *------------------------------------------ */
2059 for (i = 0; i < blocks; ++i)
2060 {
2061 int beg_row = s->beg_row[i];
2062 int end_row = beg_row + s->row_count[i];
2063
2064 for (row = beg_row; row < end_row; ++row)
2065 {
2066 bool isBdry = false;
2067 int len;
2068 EuclidGetRow (A, row, &len, &cval, NULL);
2070
2071 for (j = 0; j < len; ++j)
2072 { /* for each column in the row */
2073 int col = cval[j];
2074
2075 if (col < beg_row || col >= end_row)
2076 {
2077 tmp[col] = 1;
2078 isBdry = true;
2079 }
2080 }
2081 if (isBdry)
2082 tmp[row] = 1;
2083 EuclidRestoreRow (A, row, &len, &cval, NULL);
2085 }
2086 }
2087
2088 /*------------------------------------------
2089 * fill in the bdry_count[] array
2090 *------------------------------------------ */
2091 for (i = 0; i < blocks; ++i)
2092 {
2093 int beg_row = s->beg_row[i];
2094 int end_row = beg_row + s->row_count[i];
2095 int ct = 0;
2096 for (row = beg_row; row < end_row; ++row)
2097 {
2098 if (tmp[row])
2099 ++ct;
2100 }
2101 s->bdry_count[i] = ct;
2102 }
2103
2104 /*------------------------------------------
2105 * form the o2n_col[] permutation
2106 *------------------------------------------ */
2107 for (i = 0; i < blocks; ++i)
2108 {
2109 int beg_row = s->beg_row[i];
2110 int end_row = beg_row + s->row_count[i];
2111 int interiorIDX = beg_row;
2112 int bdryIDX = end_row - s->bdry_count[i];
2113
2114 for (row = beg_row; row < end_row; ++row)
2115 {
2116 if (tmp[row])
2117 {
2118 s->o2n_col[row] = bdryIDX++;
2119 }
2120 else
2121 {
2122 s->o2n_col[row] = interiorIDX++;
2123 }
2124 }
2125 }
2126
2127 /* invert permutation */
2128 invert_perm (m, s->o2n_col, s->n2o_row);
2130 FREE_DH (tmp);
2132
2134
2135#undef __FUNC__
2136#define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
2137void
2139{
2140 START_FUNC_DH if (myid_dh == 0)
2141 {
2142 int i, j;
2143
2144 fprintf (fp,
2145 "\n-----------------------------------------------------\n");
2146 fprintf (fp, "SubdomainGraph, and coloring and ordering information\n");
2147 fprintf (fp, "-----------------------------------------------------\n");
2148 fprintf (fp, "colors used: %i\n", s->colors);
2149
2150 fprintf (fp, "o2n ordering vector: ");
2151 for (i = 0; i < s->blocks; ++i)
2152 fprintf (fp, "%i ", s->o2n_sub[i]);
2153
2154 fprintf (fp, "\ncoloring vector (node, color): \n");
2155 for (i = 0; i < s->blocks; ++i)
2156 fprintf (fp, " %i, %i\n", i, s->colorVec[i]);
2157
2158 fprintf (fp, "\n");
2159 fprintf (fp, "Adjacency lists:\n");
2160
2161 for (i = 0; i < s->blocks; ++i)
2162 {
2163 fprintf (fp, " P_%i :: ", i);
2164 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
2165 {
2166 fprintf (fp, "%i ", s->adj[j]);
2167 }
2168 fprintf (fp, "\n");
2169 }
2170 fprintf (fp, "-----------------------------------------------------\n");
2171 }
2173
2174
2175#undef __FUNC__
2176#define __FUNC__ "adjust_matrix_perms_private"
2177void
2179{
2180 START_FUNC_DH int i, j, blocks = s->blocks;
2181 int *o2n = s->o2n_col;
2182
2183 for (i = 0; i < blocks; ++i)
2184 {
2185 int beg_row = s->beg_row[i];
2186 int end_row = beg_row + s->row_count[i];
2187 int adjust = s->beg_rowP[i] - s->beg_row[i];
2188 for (j = beg_row; j < end_row; ++j)
2189 o2n[j] += adjust;
2190 }
2191
2192 invert_perm (m, s->o2n_col, s->n2o_row);
2195
2196#undef __FUNC__
2197#define __FUNC__ "SubdomainGraph_dhPrintRatios"
2198void
2200{
2201 START_FUNC_DH int i;
2202 int blocks = np_dh;
2203 double ratio[25];
2204
2205 if (myid_dh == 0)
2206 {
2207 if (np_dh == 1)
2208 blocks = s->blocks;
2209 if (blocks > 25)
2210 blocks = 25;
2211
2212 fprintf (fp, "\n");
2213 fprintf (fp, "Subdomain interior/boundary node ratios\n");
2214 fprintf (fp, "---------------------------------------\n");
2215
2216 /* compute ratios */
2217 for (i = 0; i < blocks; ++i)
2218 {
2219 if (s->bdry_count[i] == 0)
2220 {
2221 ratio[i] = -1;
2222 }
2223 else
2224 {
2225 ratio[i] =
2226 (double) (s->row_count[i] -
2227 s->bdry_count[i]) / (double) s->bdry_count[i];
2228 }
2229 }
2230
2231 /* sort ratios */
2232 shellSort_float (blocks, ratio);
2233
2234 /* print ratios */
2235 if (blocks <= 20)
2236 { /* print all ratios */
2237 int j = 0;
2238 for (i = 0; i < blocks; ++i)
2239 {
2240 fprintf (fp, "%0.2g ", ratio[i]);
2241 ++j;
2242 if (j == 10)
2243 {
2244 fprintf (fp, "\n");
2245 }
2246 }
2247 fprintf (fp, "\n");
2248 }
2249 else
2250 { /* print 10 largest and 10 smallest ratios */
2251 fprintf (fp, "10 smallest ratios: ");
2252 for (i = 0; i < 10; ++i)
2253 {
2254 fprintf (fp, "%0.2g ", ratio[i]);
2255 }
2256 fprintf (fp, "\n");
2257 fprintf (fp, "10 largest ratios: ");
2258 {
2259 int start = blocks - 6, stop = blocks - 1;
2260 for (i = start; i < stop; ++i)
2261 {
2262 fprintf (fp, "%0.2g ", ratio[i]);
2263 }
2264 fprintf (fp, "\n");
2265 }
2266 }
2267 }
2268
2270
2271
2272#undef __FUNC__
2273#define __FUNC__ "SubdomainGraph_dhPrintStats"
2274void
2276{
2277 START_FUNC_DH double *timing = sg->timing;
2278
2279 fprintf_dh (fp, "\nSubdomainGraph timing report\n");
2280 fprintf_dh (fp, "-----------------------------\n");
2281 fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]);
2282 fprintf_dh (fp, " find neighbors in subdomain graph: %0.2f\n",
2284 fprintf_dh (fp, " locally order interiors and bdry: %0.2f\n",
2286 fprintf_dh (fp, " form and color subdomain graph: %0.2f\n",
2288 fprintf_dh (fp, " exchange bdry permutations: %0.2f\n",
2290 fprintf_dh (fp, " everything else (should be small): %0.2f\n",
void Hash_i_dhCreate(Hash_i_dh *h, int sizeIN)
Definition Hash_i_dh.c:95
void Hash_i_dhInsert(Hash_i_dh h, int key, int dataIN)
Definition Hash_i_dh.c:206
void Hash_i_dhDestroy(Hash_i_dh h)
Definition Hash_i_dh.c:146
#define min(x, y)
bool Parser_dhHasSwitch(Parser_dh p, char *s)
Definition Parser_dh.c:213
void SortedSet_dhGetList(SortedSet_dh ss, int **list, int *count)
void SortedSet_dhDestroy(SortedSet_dh ss)
void SortedSet_dhInsert(SortedSet_dh ss, int idx)
void SortedSet_dhCreate(SortedSet_dh *ss, int size)
static void find_bdry_nodes_seq_private(SubdomainGraph_dh s, int m, void *A)
void SubdomainGraph_dhDestroy(SubdomainGraph_dh s)
void SubdomainGraph_dhInit(SubdomainGraph_dh s, int blocks, bool bj, void *A)
void SubdomainGraph_dhPrintSubdomainGraph(SubdomainGraph_dh s, FILE *fp)
void SubdomainGraph_dhCreate(SubdomainGraph_dh *s)
static void color_subdomain_graph_private(SubdomainGraph_dh s)
void SubdomainGraph_dhExchangePerms(SubdomainGraph_dh s)
static void find_bdry_nodes_unsym_private(SubdomainGraph_dh s, int m, void *A, int *interiorNodes, int *bdryNodes, int *interiorCount, int *bdryCount)
static void init_seq_private(SubdomainGraph_dh s, int blocks, bool bj, void *A)
static void find_bdry_nodes_sym_private(SubdomainGraph_dh s, int m, void *A, int *interiorNodes, int *bdryNodes, int *interiorCount, int *bdryCount)
static void init_mpi_private(SubdomainGraph_dh s, int blocks, bool bj, void *A)
void SubdomainGraph_dhPrintStatsLong(SubdomainGraph_dh s, FILE *fp)
static void find_all_neighbors_unsym_private(SubdomainGraph_dh s, int m, void *A)
#define BDRY_NODE_TAG
int SubdomainGraph_dhFindOwner(SubdomainGraph_dh s, int idx, bool permuted)
static void find_ordered_neighbors_private(SubdomainGraph_dh s)
static void find_all_neighbors_sym_private(SubdomainGraph_dh s, int m, void *A)
static void form_subdomaingraph_seq_private(SubdomainGraph_dh s, int m, void *A)
static void form_subdomaingraph_mpi_private(SubdomainGraph_dh s)
void SubdomainGraph_dhDump(SubdomainGraph_dh s, char *filename)
static void adjust_matrix_perms_private(SubdomainGraph_dh s, int m)
void SubdomainGraph_dhPrintRatios(SubdomainGraph_dh s, FILE *fp)
void SubdomainGraph_dhPrintStats(SubdomainGraph_dh sg, FILE *fp)
static void allocate_storage_private(SubdomainGraph_dh s, int blocks, int m, bool bj)
#define TIMING_BINS_SG
@ ORDER_BDRY_SGT
@ TOTAL_SGT
@ FORM_GRAPH_SGT
@ EXCHANGE_PERMS_SGT
@ FIND_NABORS_SGT
bool ignoreMe
int myid_dh
int np_dh
Parser_dh parser_dh
void printf_dh(char *fmt,...)
void fprintf_dh(FILE *fp, char *fmt,...)
char msgBuf_dh[MSG_BUF_SIZE_DH]
MPI_Comm comm_dh
FILE * logFile
#define MALLOC_DH(s)
#define FREE_DH(p)
void EuclidGetRow(void *A, int row, int *len, int **ind, double **val)
Definition getRow_dh.c:56
void EuclidRestoreRow(void *A, int row, int *len, int **ind, double **val)
Definition getRow_dh.c:80
void EuclidGetDimensions(void *A, int *beg_row, int *rowsLocal, int *rowsGlobal)
Definition getRow_dh.c:89
FILE * openFile_dh(const char *filenameIN, const char *modeIN)
Definition io_dh.c:54
void closeFile_dh(FILE *fpIN)
Definition io_dh.c:69
#define MIN(a, b)
Definition macros_dh.h:55
#define SET_V_ERROR(msg)
Definition macros_dh.h:126
#define START_FUNC_DH
Definition macros_dh.h:181
#define CHECK_V_ERROR
Definition macros_dh.h:138
#define SET_INFO(msg)
Definition macros_dh.h:156
#define END_FUNC_DH
Definition macros_dh.h:187
#define END_FUNC_VAL(retval)
Definition macros_dh.h:208
#define MAX(a, b)
Definition macros_dh.h:51
#define SET_ERROR(retval, msg)
Definition macros_dh.h:132
void invert_perm(int m, int *pIN, int *pOUT)
#define max(x, y)
Definition scscres.c:46
void shellSort_int(const int n, int *x)
void shellSort_float(const int n, double *x)
double timing[TIMING_BINS_SG]