OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pigemr.c File Reference
#include "redist.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

Go to the source code of this file.

Data Structures

struct  MDESC
struct  IDESC

Macros

#define static2   static
#define fortran_mr2d   pigemr2do
#define fortran_mr2dnew   pigemr2d
#define icopy_   icopy
#define ilacpy_   ilacpy
#define Clacpy   Cigelacpy
#define BLOCK_CYCLIC_2D   1
#define SHIFT(row, sprow, nbrow)
#define max(A, B)
#define min(A, B)
#define DIVUP(a, b)
#define ROUNDUP(a, b)
#define scanD0   igescanD0
#define dispmat   igedispmat
#define setmemory   igesetmemory
#define freememory   igefreememory
#define scan_intervals   igescan_intervals
#define SENDBUFF   0
#define RECVBUFF   1
#define SIZEBUFF   2
#define NDEBUG
#define DESCLEN   9
#define NBPARAM
#define MAGIC_MAX   100000000
#define Mlacpy(mo, no, ao, ldao, bo, ldbo)

Functions

void Cblacs_pcoord ()
Int Cblacs_pnum ()
void Csetpvmtids ()
void Cblacs_get ()
void Cblacs_pinfo ()
void Cblacs_gridinfo ()
void Cblacs_gridinit ()
void Cblacs_exit ()
void Cblacs_gridexit ()
void Cblacs_setup ()
void Cigebs2d ()
void Cigebr2d ()
void Cigesd2d ()
void Cigerv2d ()
void Cigsum2d ()
void Cigamn2d ()
void Cigamx2d ()
Int localindice ()
void * mr2d_malloc ()
Int ppcm ()
Int localsize ()
Int memoryblocksize ()
Int changeorigin ()
void paramcheck ()
void Cpigemr2do ()
void Cpigemr2d ()
void fortran_mr2d (Int *m, Int *n, Int *A, Int *ia, Int *ja, Int desc_A[DESCLEN], Int *B, Int *ib, Int *jb, Int desc_B[DESCLEN])
void fortran_mr2dnew (Int *m, Int *n, Int *A, Int *ia, Int *ja, Int desc_A[DESCLEN], Int *B, Int *ib, Int *jb, Int desc_B[DESCLEN], Int *gcontext)
static2 void init_chenille ()
static2 Int inter_len ()
static2 Int block2buff ()
static2 void buff2block ()
static2 void gridreshape ()
void Cpigemr2do (Int m, Int n, Int *ptrmyblock, Int ia, Int ja, MDESC *ma, Int *ptrmynewblock, Int ib, Int jb, MDESC *mb)
void Cpigemr2d (Int m, Int n, Int *ptrmyblock, Int ia, Int ja, MDESC *ma, Int *ptrmynewblock, Int ib, Int jb, MDESC *mb, Int globcontext)
static2 void init_chenille (Int mypnum, Int nprocs, Int n0, Int *proc0, Int n1, Int *proc1, Int **psend, Int **precv, Int *myrang)
static2 Int block2buff (IDESC *vi, Int vinb, IDESC *hi, Int hinb, Int *ptra, MDESC *ma, Int *buff)
static2 void buff2block (IDESC *vi, Int vinb, IDESC *hi, Int hinb, Int *buff, Int *ptrb, MDESC *mb)
static2 Int inter_len (Int hinb, IDESC *hi, Int vinb, IDESC *vi)
void Clacpy (Int m, Int n, Int *a, Int lda, Int *b, Int ldb)
static2 void gridreshape (Int *ctxtp)

Macro Definition Documentation

◆ BLOCK_CYCLIC_2D

#define BLOCK_CYCLIC_2D   1

Definition at line 171 of file pigemr.c.

◆ Clacpy

#define Clacpy   Cigelacpy

Definition at line 158 of file pigemr.c.

◆ DESCLEN

#define DESCLEN   9

Definition at line 242 of file pigemr.c.

◆ dispmat

#define dispmat   igedispmat

Definition at line 218 of file pigemr.c.

◆ DIVUP

#define DIVUP ( a,
b )
Value:
( ((a)-1) /(b)+1)

Definition at line 179 of file pigemr.c.

◆ fortran_mr2d

#define fortran_mr2d   pigemr2do

Definition at line 153 of file pigemr.c.

◆ fortran_mr2dnew

#define fortran_mr2dnew   pigemr2d

Definition at line 154 of file pigemr.c.

◆ freememory

#define freememory   igefreememory

Definition at line 220 of file pigemr.c.

◆ icopy_

#define icopy_   icopy

Definition at line 155 of file pigemr.c.

◆ ilacpy_

#define ilacpy_   ilacpy

Definition at line 156 of file pigemr.c.

◆ MAGIC_MAX

#define MAGIC_MAX   100000000

Definition at line 286 of file pigemr.c.

◆ max

#define max ( A,
B )
Value:
((A)>(B)?(A):(B))

Definition at line 177 of file pigemr.c.

◆ min

#define min ( A,
B )
Value:
((A)>(B)?(B):(A))

Definition at line 178 of file pigemr.c.

◆ Mlacpy

#define Mlacpy ( mo,
no,
ao,
ldao,
bo,
ldbo )
Value:
{ \
Int *_a,*_b; \
Int _m,_n,_lda,_ldb; \
Int _i,_j; \
_m = (mo);_n = (no); \
_a = (ao);_b = (bo); \
_lda = (ldao) - _m; \
_ldb = (ldbo) - _m; \
assert(_lda >= 0 && _ldb >= 0); \
for (_j=0;_j<_n;_j++) { \
for (_i=0;_i<_m;_i++) \
*_b++ = *_a++; \
_b += _ldb; \
_a += _lda; \
} \
}
#define Int
Definition Bconfig.h:22

Definition at line 617 of file pigemr.c.

617}
618#define Mlacpy(mo,no,ao,ldao,bo,ldbo) \
619{ \
620Int *_a,*_b; \
621Int _m,_n,_lda,_ldb; \
622 Int _i,_j; \
623 _m = (mo);_n = (no); \
624 _a = (ao);_b = (bo); \
625 _lda = (ldao) - _m; \
626 _ldb = (ldbo) - _m; \
627 assert(_lda >= 0 && _ldb >= 0); \
628 for (_j=0;_j<_n;_j++) { \
629 for (_i=0;_i<_m;_i++) \
630 *_b++ = *_a++; \
631 _b += _ldb; \
632 _a += _lda; \
633 } \

◆ NBPARAM

#define NBPARAM
Value:
20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis
* idem B puis ia,ja puis ib,jb */

Definition at line 285 of file pigemr.c.

◆ NDEBUG

#define NDEBUG

Definition at line 237 of file pigemr.c.

◆ RECVBUFF

#define RECVBUFF   1

Definition at line 231 of file pigemr.c.

◆ ROUNDUP

#define ROUNDUP ( a,
b )
Value:
(DIVUP(a,b)*(b))
#define DIVUP(a, b)
Definition pcgemr.c:182

Definition at line 180 of file pigemr.c.

◆ scan_intervals

#define scan_intervals   igescan_intervals

Definition at line 221 of file pigemr.c.

◆ scanD0

#define scanD0   igescanD0

Definition at line 217 of file pigemr.c.

◆ SENDBUFF

#define SENDBUFF   0

Definition at line 230 of file pigemr.c.

◆ setmemory

#define setmemory   igesetmemory

Definition at line 219 of file pigemr.c.

◆ SHIFT

#define SHIFT ( row,
sprow,
nbrow )
Value:
((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))

Definition at line 176 of file pigemr.c.

◆ SIZEBUFF

#define SIZEBUFF   2

Definition at line 232 of file pigemr.c.

◆ static2

#define static2   static

Id
pigemr.c,v 1.1.1.1 2000/02/15 18:04:08 susan Exp

– ScaLAPACK routine (version 1.7) – Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994.

SUBROUTINE PIGEMR2D( M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC,

$ CTXT)

Purpose

PIGEMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B.

The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule:

  • If a processor is in A context, all parameters related to A must be valid.
  • If a processor is in B context, all parameters related to B must be valid.
  • ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1.
  • M and N must be valid for everyone.
  • other parameters are not examined.

Notes

A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location.

In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A:

NOTATION STORED IN EXPLANATION


DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)).

Important notice

The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution.

Be aware that all processors included in this context must call the redistribution routine.

Parameters

M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit.

N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit.

A (input) INTEGER On entry, the source matrix. Unchanged on exit.

IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit.

ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1.

B (output) INTEGER On entry, the destination matrix. The portion corresponding to the defined submatrix are updated.

IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit.

BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1.

CTXT (input) a context englobing at least all processors included in either A context or B context

Memory requirement :

for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1.


Created March 1993 by B. Tourancheau (See sccs for modifications).

Modifications by Loic PRYLLI 1995

Definition at line 143 of file pigemr.c.

Function Documentation

◆ block2buff() [1/2]

static2 Int block2buff ( )

◆ block2buff() [2/2]

static2 Int block2buff ( IDESC * vi,
Int vinb,
IDESC * hi,
Int hinb,
Int * ptra,
MDESC * ma,
Int * buff )

Definition at line 635 of file pigemr.c.

637{
638 Int h, v, sizebuff;
639 Int *ptr2;
640 sizebuff = 0;
641 for (h = 0; h < hinb; h++) {
642 ptr2 = ptra + hi[h].lstart * ma->lda;
643 for (v = 0; v < vinb; v++) {
644 Mlacpy(vi[v].len, hi[h].len,
645 ptr2 + vi[v].lstart,
646 ma->lda,
647 buff + sizebuff, vi[v].len);
648 sizebuff += hi[h].len * vi[v].len;
649 }
650 }
651 return sizebuff;
#define Mlacpy(mo, no, ao, ldao, bo, ldbo)
Definition pigemr.c:617
Int lstart
Definition pcgemr.c:176
Int len
Definition pcgemr.c:177
Int lda
Definition pcgemr.c:172

◆ buff2block() [1/2]

static2 void buff2block ( )

◆ buff2block() [2/2]

static2 void buff2block ( IDESC * vi,
Int vinb,
IDESC * hi,
Int hinb,
Int * buff,
Int * ptrb,
MDESC * mb )

Definition at line 653 of file pigemr.c.

655{
656 Int h, v, sizebuff;
657 Int *ptr2;
658 sizebuff = 0;
659 for (h = 0; h < hinb; h++) {
660 ptr2 = ptrb + hi[h].lstart * mb->lda;
661 for (v = 0; v < vinb; v++) {
662 Mlacpy(vi[v].len, hi[h].len,
663 buff + sizebuff, vi[v].len,
664 ptr2 + vi[v].lstart,
665 mb->lda);
666 sizebuff += hi[h].len * vi[v].len;
667 }
668 }

◆ Cblacs_exit()

void Cblacs_exit ( )
extern

◆ Cblacs_get()

void Cblacs_get ( )
extern

◆ Cblacs_gridexit()

void Cblacs_gridexit ( )
extern

◆ Cblacs_gridinfo()

void Cblacs_gridinfo ( )
extern

◆ Cblacs_gridinit()

void Cblacs_gridinit ( )
extern

◆ Cblacs_pcoord()

void Cblacs_pcoord ( )
extern

◆ Cblacs_pinfo()

void Cblacs_pinfo ( )
extern

◆ Cblacs_pnum()

Int Cblacs_pnum ( )
extern

◆ Cblacs_setup()

void Cblacs_setup ( )
extern

◆ changeorigin()

Int changeorigin ( )
extern

◆ Cigamn2d()

void Cigamn2d ( )
extern

◆ Cigamx2d()

void Cigamx2d ( )
extern

◆ Cigebr2d()

void Cigebr2d ( )
extern

◆ Cigebs2d()

void Cigebs2d ( )
extern

◆ Cigerv2d()

void Cigerv2d ( )
extern

◆ Cigesd2d()

void Cigesd2d ( )
extern

◆ Cigsum2d()

void Cigsum2d ( )
extern

◆ Clacpy()

void Clacpy ( Int m,
Int n,
Int * a,
Int lda,
Int * b,
Int ldb )

Definition at line 682 of file pigemr.c.

684{
685 Int i, j;
686 lda -= m;
687 ldb -= m;
688 assert(lda >= 0 && ldb >= 0);
689 for (j = 0; j < n; j++) {
690 for (i = 0; i < m; i++)
691 *b++ = *a++;
692 b += ldb;
693 a += lda;
694 }
n

◆ Cpigemr2d() [1/2]

void Cpigemr2d ( )
extern

◆ Cpigemr2d() [2/2]

void Cpigemr2d ( Int m,
Int n,
Int * ptrmyblock,
Int ia,
Int ja,
MDESC * ma,
Int * ptrmynewblock,
Int ib,
Int jb,
MDESC * mb,
Int globcontext )

Definition at line 288 of file pigemr.c.

297{
298 Int *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
299 Int *recvptr;
300 MDESC newa, newb;
301 Int *proc0, *proc1, *param;
302 Int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
303 Int i, j;
304 Int nprow, npcol, gcontext;
305 Int recvsize, sendsize;
306 IDESC *h_inter; /* to store the horizontal intersections */
307 IDESC *v_inter; /* to store the vertical intersections */
308 Int hinter_nb, vinter_nb; /* number of intrsections in both directions */
309 Int dummy;
310 Int p0, q0, p1, q1;
311 Int *ra, *ca;
312 /* end of variables */
313 /* To simplify further calcul we change the matrix indexation from
314 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */
315 if (m == 0 || n == 0)
316 return;
317 ia -= 1;
318 ja -= 1;
319 ib -= 1;
320 jb -= 1;
321 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum);
322 gcontext = globcontext;
323 nprocs = nprow * npcol;
324 /* if the global context that is given to us has not the shape of a line
325 * (nprow != 1), create a new context. TODO: to be optimal, we should
326 * avoid this because it is an uncessary synchronisation */
327 if (nprow != 1) {
328 gridreshape(&gcontext);
329 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum);
330 }
331 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0);
332 /* compatibility T3D, must check myprow and mypcol are within bounds */
333 if (myprow0 >= p0 || mypcol0 >= q0)
334 myprow0 = mypcol0 = -1;
335 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
336 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1);
337 if (myprow1 >= p1 || mypcol1 >= q1)
338 myprow1 = mypcol1 = -1;
339 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
340 /* exchange the missing parameters among the processors: shape of grids and
341 * location of the processors */
342 param = (Int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(Int));
343 ra = param + nprocs * 2 + NBPARAM;
344 ca = param + (nprocs * 2 + NBPARAM) * 2;
345 for (i = 0; i < nprocs * 2 + NBPARAM; i++)
346 param[i] = MAGIC_MAX;
347 proc0 = param + NBPARAM;
348 proc1 = param + NBPARAM + nprocs;
349 /* we calulate proc0 and proc1 that will give the number of a proc in
350 * respectively a or b in the global context */
351 if (myprow0 >= 0) {
352 proc0[myprow0 * q0 + mypcol0] = mypnum;
353 param[0] = p0;
354 param[1] = q0;
355 param[4] = ma->m;
356 param[5] = ma->n;
357 param[6] = ma->nbrow;
358 param[7] = ma->nbcol;
359 param[8] = ma->sprow;
360 param[9] = ma->spcol;
361 param[10] = ia;
362 param[11] = ja;
363 }
364 if (myprow1 >= 0) {
365 proc1[myprow1 * q1 + mypcol1] = mypnum;
366 param[2] = p1;
367 param[3] = q1;
368 param[12] = mb->m;
369 param[13] = mb->n;
370 param[14] = mb->nbrow;
371 param[15] = mb->nbcol;
372 param[16] = mb->sprow;
373 param[17] = mb->spcol;
374 param[18] = ib;
375 param[19] = jb;
376 }
377 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, (Int)1, param, (Int)2 * nprocs + NBPARAM,
378 ra, ca, 2 * nprocs + NBPARAM, (Int)-1, (Int)-1);
379 newa = *ma;
380 newb = *mb;
381 ma = &newa;
382 mb = &newb;
383 if (myprow0 == -1) {
384 p0 = param[0];
385 q0 = param[1];
386 ma->m = param[4];
387 ma->n = param[5];
388 ma->nbrow = param[6];
389 ma->nbcol = param[7];
390 ma->sprow = param[8];
391 ma->spcol = param[9];
392 ia = param[10];
393 ja = param[11];
394 }
395 if (myprow1 == -1) {
396 p1 = param[2];
397 q1 = param[3];
398 mb->m = param[12];
399 mb->n = param[13];
400 mb->nbrow = param[14];
401 mb->nbcol = param[15];
402 mb->sprow = param[16];
403 mb->spcol = param[17];
404 ib = param[18];
405 jb = param[19];
406 }
407 for (i = 0; i < NBPARAM; i++) {
408 if (param[i] == MAGIC_MAX) {
409 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
410 exit(1);
411 }
412 }
413#ifndef NDEBUG
414 for (i = 0; i < p0 * q0; i++)
415 assert(proc0[i] >= 0 && proc0[i] < nprocs);
416 for (i = 0; i < p1 * q1; i++)
417 assert(proc1[i] >= 0 && proc1[i] < nprocs);
418#endif
419 /* check the validity of the parameters */
420 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
421 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
422 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */
423 {
424 Int decal;
425 ia = changeorigin(myprow0, ma->sprow, p0,
426 ma->nbrow, ia, &decal, &ma->sprow);
427 ptrmyblock += decal;
428 ja = changeorigin(mypcol0, ma->spcol, q0,
429 ma->nbcol, ja, &decal, &ma->spcol);
430 ptrmyblock += decal * ma->lda;
431 ma->m = ia + m;
432 ma->n = ja + n;
433 ib = changeorigin(myprow1, mb->sprow, p1,
434 mb->nbrow, ib, &decal, &mb->sprow);
435 ptrmynewblock += decal;
436 jb = changeorigin(mypcol1, mb->spcol, q1,
437 mb->nbcol, jb, &decal, &mb->spcol);
438 ptrmynewblock += decal * mb->lda;
439 mb->m = ib + m;
440 mb->n = jb + n;
441 if (p0 == 1)
442 ma->nbrow = ma->m;
443 if (q0 == 1)
444 ma->nbcol = ma->n;
445 if (p1 == 1)
446 mb->nbrow = mb->m;
447 if (q1 == 1)
448 mb->nbcol = mb->n;
449#ifndef NDEBUG
450 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
451 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
452#endif
453 }
454 /* We compute the size of the memory buffer ( we choose the worst case,
455 * when the buffer sizes == the memory block sizes). */
456 if (myprow0 >= 0 && mypcol0 >= 0) {
457 /* Initialize pointer variables */
458 setmemory(&ptrsendbuff, memoryblocksize(ma));
459 }; /* if (mypnum < p0 * q0) */
460 if (myprow1 >= 0 && mypcol1 >= 0) {
461 /* Initialize pointer variables */
462 setmemory(&ptrrecvbuff, memoryblocksize(mb));
463 }; /* if (mypnum < p1 * q1) */
464 /* allocing room for the tabs, alloc for the worst case,local_n or local_m
465 * intervals, in fact the worst case should be less, perhaps half that,I
466 * should think of that one day. */
467 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) *
468 ma->nbcol * sizeof(IDESC));
469 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow)
470 * ma->nbrow * sizeof(IDESC));
471 /* We go for the scanning of indices. For each processor including mypnum,
472 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send
473 * it. Then for each processor, we compute the size of message to be
474 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements
475 * of recvbuff the right place (scanD)(RECVBUFF)) */
476 recvptr = ptrrecvbuff;
477 {
478 Int tot, myrang, step, sens;
479 Int *sender, *recver;
480 Int mesending, merecving;
481 tot = max(p0 * q0, p1 * q1);
482 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
483 &sender, &recver, &myrang);
484 if (myrang == -1)
485 goto after_comm;
486 mesending = myprow0 >= 0;
487 assert(sender[myrang] >= 0 || !mesending);
488 assert(!mesending || proc0[sender[myrang]] == mypnum);
489 merecving = myprow1 >= 0;
490 assert(recver[myrang] >= 0 || !merecving);
491 assert(!merecving || proc1[recver[myrang]] == mypnum);
492 step = tot - 1 - myrang;
493 do {
494 for (sens = 0; sens < 2; sens++) {
495 /* be careful here, when we communicating with ourselves, we must
496 * send first (myrang > step == 0) */
497 if (mesending && recver[step] >= 0 &&
498 (sens == 0)) {
499 i = recver[step] / q1;
500 j = recver[step] % q1;
501 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
502 v_inter);
503 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
504 h_inter);
505 sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb,
506 ptrmyblock, ma, ptrsendbuff);
507 } /* if (mesending...) { */
508 if (mesending && recver[step] >= 0 &&
509 (sens == myrang > step)) {
510 i = recver[step] / q1;
511 j = recver[step] % q1;
512 if (sendsize > 0
513 && (step != myrang || !merecving)
514 ) {
515 Cigesd2d(gcontext, sendsize, (Int)1, ptrsendbuff, sendsize,
516 (Int)0, proc1[i * q1 + j]);
517 } /* sendsize > 0 */
518 } /* if (mesending ... */
519 if (merecving && sender[step] >= 0 &&
520 (sens == myrang <= step)) {
521 i = sender[step] / q0;
522 j = sender[step] % q0;
523 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
524 v_inter);
525 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
526 h_inter);
527 recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter);
528 if (recvsize > 0) {
529 if (step == myrang && mesending) {
530 Clacpy(recvsize, (Int)1,
531 ptrsendbuff, recvsize,
532 ptrrecvbuff, recvsize);
533 } else {
534 Cigerv2d(gcontext, recvsize, (Int)1, ptrrecvbuff, recvsize,
535 (Int)0, proc0[i * q0 + j]);
536 }
537 } /* recvsize > 0 */
538 } /* if (merecving ...) */
539 if (merecving && sender[step] >= 0 && sens == 1) {
540 buff2block(v_inter, vinter_nb, h_inter, hinter_nb,
541 recvptr, ptrmynewblock, mb);
542 } /* if (merecving...) */
543 } /* for (sens = 0) */
544 step -= 1;
545 if (step < 0)
546 step = tot - 1;
547 } while (step != tot - 1 - myrang);
548after_comm:
549 free(sender);
550 } /* { int tot,nr,ns ...} */
551 /* don't forget to clean up things! */
552 if (myprow1 >= 0 && mypcol1 >= 0) {
553 freememory((char *) ptrrecvbuff);
554 };
555 if (myprow0 >= 0 && mypcol0 >= 0) {
556 freememory((char *) ptrsendbuff);
557 };
558 if (nprow != 1)
559 Cblacs_gridexit(gcontext);
560 free(v_inter);
561 free(h_inter);
562 free(param);
integer, save, private nprocs
Definition cmumps_load.F:57
#define NBPARAM
Definition pcgemr.c:288
#define MAGIC_MAX
Definition pcgemr.c:289
static2 Int inter_len()
Int memoryblocksize()
Int changeorigin()
#define freememory
Definition pigemr.c:220
#define scan_intervals
Definition pigemr.c:221
void Cblacs_gridexit()
#define max(A, B)
Definition pigemr.c:177
static2 void gridreshape()
#define DIVUP(a, b)
Definition pigemr.c:179
#define Clacpy
Definition pigemr.c:158
void Cigamn2d()
#define setmemory
Definition pigemr.c:219
void Cigerv2d()
static2 void buff2block()
void paramcheck()
static2 Int block2buff()
void Cblacs_gridinfo()
void Cigesd2d()
void * mr2d_malloc()
static2 void init_chenille()
Int m
Definition pcgemr.c:166
Int spcol
Definition pcgemr.c:171
Int nbcol
Definition pcgemr.c:169
Int sprow
Definition pcgemr.c:170
Int nbrow
Definition pcgemr.c:168
Int ctxt
Definition pcgemr.c:165
Int n
Definition pcgemr.c:167

◆ Cpigemr2do() [1/2]

void Cpigemr2do ( )
extern

◆ Cpigemr2do() [2/2]

void Cpigemr2do ( Int m,
Int n,
Int * ptrmyblock,
Int ia,
Int ja,
MDESC * ma,
Int * ptrmynewblock,
Int ib,
Int jb,
MDESC * mb )

Definition at line 265 of file pigemr.c.

273{
274 Int dummy, nprocs;
275 Int gcontext;
276 /* first we initialize a global grid which serve as a reference to
277 * communicate from grid a to grid b */
278 Cblacs_pinfo(&dummy, &nprocs);
279 Cblacs_get((Int)0, (Int)0, &gcontext);
280 Cblacs_gridinit(&gcontext, "R", (Int)1, nprocs);
281 Cpigemr2d(m, n, ptrmyblock, ia, ja, ma,
282 ptrmynewblock, ib, jb, mb, gcontext);
283 Cblacs_gridexit(gcontext);
284}
void Cpigemr2d()
void Cblacs_pinfo()
void Cblacs_get()
void Cblacs_gridinit()

◆ Csetpvmtids()

void Csetpvmtids ( )
extern

◆ fortran_mr2d()

void fortran_mr2d ( Int * m,
Int * n,
Int * A,
Int * ia,
Int * ja,
Int desc_A[DESCLEN],
Int * B,
Int * ib,
Int * jb,
Int desc_B[DESCLEN] )

Definition at line 244 of file pigemr.c.

246{
247 Cpigemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A,
248 B, *ib, *jb, (MDESC *) desc_B);
249 return;
250}
void Cpigemr2do()

◆ fortran_mr2dnew()

void fortran_mr2dnew ( Int * m,
Int * n,
Int * A,
Int * ia,
Int * ja,
Int desc_A[DESCLEN],
Int * B,
Int * ib,
Int * jb,
Int desc_B[DESCLEN],
Int * gcontext )

Definition at line 252 of file pigemr.c.

254{
255 Cpigemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A,
256 B, *ib, *jb, (MDESC *) desc_B, *gcontext);
257 return;
258}

◆ gridreshape() [1/2]

static2 void gridreshape ( )

◆ gridreshape() [2/2]

static2 void gridreshape ( Int * ctxtp)

Definition at line 696 of file pigemr.c.

698{
699 Int ori, final; /* original context, and new context created, with
700 * line form */
701 Int nprow, npcol, myrow, mycol;
702 Int *usermap;
703 Int i, j;
704 ori = *ctxtp;
705 Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol);
706 usermap = mr2d_malloc(sizeof(Int) * nprow * npcol);
707 for (i = 0; i < nprow; i++)
708 for (j = 0; j < npcol; j++) {
709 usermap[i + j * nprow] = Cblacs_pnum(ori, i, j);
710 }
711 /* Cblacs_get(0, 0, &final); */
712 Cblacs_get(ori, (Int)10, &final);
713 Cblacs_gridmap(&final, usermap, (Int)1, (Int)1, nprow * npcol);
714 *ctxtp = final;
715 free(usermap);
void Cblacs_gridmap()
Int Cblacs_pnum()

◆ init_chenille() [1/2]

static2 void init_chenille ( )

◆ init_chenille() [2/2]

static2 void init_chenille ( Int mypnum,
Int nprocs,
Int n0,
Int * proc0,
Int n1,
Int * proc1,
Int ** psend,
Int ** precv,
Int * myrang )

Definition at line 564 of file pigemr.c.

566{
567 Int ns, nr, i, tot;
568 Int *sender, *recver, *g0, *g1;
569 tot = max(n0, n1);
570 sender = (Int *) mr2d_malloc((nprocs + tot) * sizeof(Int) * 2);
571 recver = sender + tot;
572 *psend = sender;
573 *precv = recver;
574 g0 = recver + tot;
575 g1 = g0 + nprocs;
576 for (i = 0; i < nprocs; i++) {
577 g0[i] = -1;
578 g1[i] = -1;
579 }
580 for (i = 0; i < tot; i++) {
581 sender[i] = -1;
582 recver[i] = -1;
583 }
584 for (i = 0; i < n0; i++)
585 g0[proc0[i]] = i;
586 for (i = 0; i < n1; i++)
587 g1[proc1[i]] = i;
588 ns = 0;
589 nr = 0;
590 *myrang = -1;
591 for (i = 0; i < nprocs; i++)
592 if (g0[i] >= 0 && g1[i] >= 0) {
593 if (i == mypnum)
594 *myrang = nr;
595 sender[ns] = g0[i];
596 ns += 1;
597 recver[nr] = g1[i];
598 nr += 1;
599 assert(ns <= n0 && nr <= n1 && nr == ns);
600 }
601 for (i = 0; i < nprocs; i++)
602 if (g0[i] >= 0 && g1[i] < 0) {
603 if (i == mypnum)
604 *myrang = ns;
605 sender[ns] = g0[i];
606 ns += 1;
607 assert(ns <= n0);
608 }
609 for (i = 0; i < nprocs; i++)
610 if (g1[i] >= 0 && g0[i] < 0) {
611 if (i == mypnum)
612 *myrang = nr;
613 recver[nr] = g1[i];
614 nr += 1;
615 assert(nr <= n1);
616 }

◆ inter_len() [1/2]

static2 Int inter_len ( )

◆ inter_len() [2/2]

static2 Int inter_len ( Int hinb,
IDESC * hi,
Int vinb,
IDESC * vi )

Definition at line 670 of file pigemr.c.

672{
673 Int hlen, vlen, h, v;
674 hlen = 0;
675 for (h = 0; h < hinb; h++)
676 hlen += hi[h].len;
677 vlen = 0;
678 for (v = 0; v < vinb; v++)
679 vlen += vi[v].len;
680 return hlen * vlen;

◆ localindice()

Int localindice ( )
extern

◆ localsize()

Int localsize ( )
extern

◆ memoryblocksize()

Int memoryblocksize ( )
extern

◆ mr2d_malloc()

void * mr2d_malloc ( )
extern

◆ paramcheck()

void paramcheck ( )
extern

◆ ppcm()

Int ppcm ( )
extern