MPI_Scatter and Gather - 二维数组,不均匀块

骑士

我正在使用 MPI,并尝试将不均匀的二维数组块发送到不同的处理器。例如,如果我没有将大小为 333x225 的图像方形化,并且我想将不同大小的块发送到不同的处理器。

我见过偶数数组的@Jonathan Dursi 方法:使用 MPI 在 C 中发送二维数组块

我试着让它适应我的问题。到目前为止,我设法将甚至大块的数据发送到这样的两个进程:

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "mpi.h"

int malloc2dchar(char ***array, int n, int m) {

    /* allocate the n*m contiguous items */
    char *p = (char *)malloc(n*m*sizeof(char));
    if (!p) return -1;

    /* allocate the row pointers into the memory */
    (*array) = (char **)malloc(n*sizeof(char*));
    if (!(*array)) {
       free(p);
       return -1;
    }

    /* set up the pointers into the contiguous memory */
    for (int i=0; i<n; i++)
       (*array)[i] = &(p[i*m]);

    return 0;
}

int free2dchar(char ***array) {
    /* free the memory - the first element of the array is at the start */
    free(&((*array)[0][0]));

    /* free the pointers into the memory */
    free(*array);

    return 0;
}

int main(int argc, char **argv) {
    char **global, **local;
    const int gridsize=10; // size of grid
    const int procgridsize=2;  // size of process grid
    int rank, size;        // rank of current process and no. of processes

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (rank == 0) {
        /* fill in the array, and print it */
        malloc2dchar(&global, gridsize, gridsize);
        for (int i=0; i<gridsize; i++) {
            for (int j=0; j<gridsize; j++)
                global[i][j] = '0'+(3*i+j)%10;
        }


        printf("Global array is:\n");
        for (int i=0; i<gridsize; i++) {
            for (int j=0; j<gridsize; j++)
                putchar(global[i][j]);

            printf("\n");
        }
    }

    /* create the local array which we'll process */

    malloc2dchar(&local, 5, 10);

    /* create a datatype to describe the subarrays of the global array */

    int sizes[2]    = {gridsize, gridsize};         /* global size */
    int subsizes[2] = {5, 10};     /* local size */
    int starts[2]   = {0,0};                        /* where this one starts */
    MPI_Datatype type, subarrtype;
    MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_CHAR, &type);
    MPI_Type_create_resized(type, 0, 10*sizeof(char), &subarrtype);
    MPI_Type_commit(&subarrtype);

    char *globalptr=NULL;
    if (rank == 0) globalptr = &(global[0][0]);

    /* scatter the array to all processors */
    int sendcounts[2];
    int displs[2];

    if (rank == 0) {
        for (int i=0; i<2; i++) sendcounts[i] = 1;
        int disp = 0;
        displs[0]=0;
        displs[1]=5;

        //for (int i=0; i<procgridsize; i++) {
        //    for (int j=0; j<procgridsize; j++) {
        //        displs[i*procgridsize+j] = disp;
        //        disp += 1;
        //    }
        //    disp += ((gridsize/procgridsize)-1)*procgridsize;
        //}
    }


    MPI_Scatterv(globalptr, sendcounts, displs, subarrtype, &(local[0][0]),
                 gridsize*gridsize/2, MPI_CHAR,
                 0, MPI_COMM_WORLD);

    /* now all processors print their local data: */

    for (int p=0; p<size; p++) {
        if (rank == p) {
            printf("Local process on rank %d is:\n", rank);
            for (int i=0; i<5; i++) {
                putchar('|');
                for (int j=0; j<10; j++) {
                    putchar(local[i][j]);
                }
                printf("|\n");
            }
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

    /* now each processor has its local array, and can process it */
    for (int i=0; i<5; i++) {
        for (int j=0; j<10; j++) {
            local[i][j] = 'A' + rank;
        }
    }

    /* it all goes back to process 0 */
    MPI_Gatherv(&(local[0][0]), gridsize*gridsize/2,  MPI_CHAR,
                 globalptr, sendcounts, displs, subarrtype,
                 0, MPI_COMM_WORLD);

    /* don't need the local data anymore */
    free2dchar(&local);

    /* or the MPI data type */
    MPI_Type_free(&subarrtype);

    if (rank == 0) {
        printf("Processed grid:\n");
        for (int i=0; i<gridsize; i++) {
            for (int j=0; j<gridsize; j++) {
                putchar(global[i][j]);
            }
            printf("\n");
        }

        free2dchar(&global);
    }


    MPI_Finalize();

    return 0;
}

所以我得到:

Global array is:
0123456789
3456789012
6789012345
9012345678
2345678901
5678901234
8901234567
1234567890
4567890123
7890123456

Local process on rank 0 is:
|0123456789|
|3456789012|
|6789012345|
|9012345678|
|2345678901|

Local process on rank 1 is:
|5678901234|
|8901234567|
|1234567890|
|4567890123|
|7890123456|

Processed grid:
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB

但我希望数据是这样的(甚至不是块):

    AAAAAAAAAA
    AAAAAAAAAA
    AAAAAAAAAA
    AAAAAAAAAA
    AAAAAAAAAA
    AAAAAAAAAA
    BBBBBBBBBB
    BBBBBBBBBB
    BBBBBBBBBB
    BBBBBBBBBB

更新

我试图根据进程等级设置 tab_size。但它不能完全正常工作。

这是代码:

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "mpi.h"

int malloc2dchar(char ***array, int n, int m) {

    /* allocate the n*m contiguous items */
    char *p = (char *)malloc(n*m*sizeof(char));
    if (!p) return -1;

    /* allocate the row pointers into the memory */
    (*array) = (char **)malloc(n*sizeof(char*));
    if (!(*array)) {
       free(p);
       return -1;
    }

    /* set up the pointers into the contiguous memory */
    for (int i=0; i<n; i++)
       (*array)[i] = &(p[i*m]);

    return 0;
}

int free2dchar(char ***array) {
    /* free the memory - the first element of the array is at the start */
    free(&((*array)[0][0]));

    /* free the pointers into the memory */
    free(*array);

    return 0;
}

int main(int argc, char **argv) {
    char **global, **local;
    const int gridsize=10; // size of grid
    const int procgridsize=2;  // size of process grid
    int rank, size;        // rank of current process and no. of processes

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);


    //if (size != procgridsize*procgridsize) {
    //    fprintf(stderr,"%s: Only works with np=%d for now\n", argv[0], procgridsize);
    //    MPI_Abort(MPI_COMM_WORLD,1);
    //}

    int tab_size;
    if (rank == 0) {
        /* fill in the array, and print it */
        malloc2dchar(&global, gridsize, gridsize);
        for (int i=0; i<gridsize; i++) {
            for (int j=0; j<gridsize; j++)
                global[i][j] = '0'+(3*i+j)%10;
        }


        printf("Global array is:\n");
        for (int i=0; i<gridsize; i++) {
            for (int j=0; j<gridsize; j++)
                putchar(global[i][j]);

            printf("\n");
        }
        tab_size = 4;
    }
    if(rank == 1)
    {
        tab_size = 6;
    }

    /* create the local array which we'll process */

    malloc2dchar(&local, tab_size, 10);

    /* create a datatype to describe the subarrays of the global array */

    int sizes[2]    = {gridsize, gridsize};         /* global size */
    int subsizes[2] = {tab_size, 10};     /* local size */
    int starts[2]   = {0,0};                        /* where this one starts */
    MPI_Datatype type, subarrtype;
    MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_CHAR, &type);
    MPI_Type_create_resized(type, 0, 10*sizeof(char), &subarrtype);
    MPI_Type_commit(&subarrtype);

    char *globalptr=NULL;
    if (rank == 0) globalptr = &(global[0][0]);

    /* scatter the array to all processors */
    int sendcounts[2];
    int displs[2];

    int tabsize;
    if (rank == 0) {
        for (int i=0; i<2; i++) sendcounts[i] = 1;
        int disp = 0;
        displs[0]=0;
        displs[1]=tab_size;

        //for (int i=0; i<procgridsize; i++) {
        //    for (int j=0; j<procgridsize; j++) {
        //        displs[i*procgridsize+j] = disp;
        //        disp += 1;
        //    }
        //    disp += ((gridsize/procgridsize)-1)*procgridsize;
        //}
    }


    MPI_Scatterv(globalptr, sendcounts, displs, subarrtype, &(local[0][0]),
                 gridsize*gridsize/2, MPI_CHAR,
                 0, MPI_COMM_WORLD);

    /* now all processors print their local data: */

    for (int p=0; p<size; p++) {
        if (rank == p) {
            printf("Local process on rank %d is:\n", rank);
            for (int i=0; i<tab_size; i++) {
                putchar('|');
                for (int j=0; j<10; j++) {
                    putchar(local[i][j]);
                }
                printf("|\n");
            }
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

    /* now each processor has its local array, and can process it */
    for (int i=0; i<tab_size; i++) {
        for (int j=0; j<10; j++) {
            local[i][j] = 'A' + rank;
        }
    }

    /* it all goes back to process 0 */
    MPI_Gatherv(&(local[0][0]), gridsize*gridsize/2,  MPI_CHAR,
                 globalptr, sendcounts, displs, subarrtype,
                 0, MPI_COMM_WORLD);

    /* don't need the local data anymore */
    free2dchar(&local);

    /* or the MPI data type */
    MPI_Type_free(&subarrtype);

    if (rank == 0) {
        printf("Processed grid:\n");
        for (int i=0; i<gridsize; i++) {
            for (int j=0; j<gridsize; j++) {
                putchar(global[i][j]);
            }
            printf("\n");
        }

        free2dchar(&global);
    }


    MPI_Finalize();

    return 0;
}

输出如下所示:

Global array is:
0123456789
3456789012
6789012345
9012345678
2345678901
5678901234
8901234567
1234567890
4567890123
7890123456
Local process on rank 0 is:
|0123456789|
|3456789012|
|6789012345|
|9012345678|
Local process on rank 1 is:
|2345678901|
|5678901234|
|8901234567|
|1234567890|
||
||
[blade001:3727] *** An error occurred in MPI_Gatherv
[blade001:3727] *** reported by process [2497249281,0]
[blade001:3727] *** on communicator MPI_COMM_WORLD
[blade001:3727] *** MPI_ERR_TRUNCATE: message truncated
[blade001:3727] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[blade001:3727] ***    and potentially your MPI job)
Zulan

为什么你的代码是错误的

您定义的数据类型在不同等级上应该相同。这不是这样做的方式。

如何做你尝试正确的事情

正如您所描述的,按完整行分解连续数据要简单得多。不需要复杂的派生数据类型,实际上您根本不需要它们。您可以使用非常简单的数据类型来表示一行。然后唯一的任务是MPI_Scatterv正确设置大小/位移

int local_rows[2] = {6, 4};

malloc2dchar(&local, local_rows[rank], gridsize);

MPI_Datatype row_type;
MPI_Type_contiguous(gridsize, MPI_CHAR, &row_type);
MPI_Type_commit(&row_type);

int displs[2];

if (rank == 0) {
  displs[0] = 0;
  for (int r = 1; r < 2; r++) {
    displs[r] = displs[r - 1] + local_rows[r - 1];
  }
}

MPI_Scatterv(globalptr, local_rows, displs, row_type, &(local[0][0]),
             local_rows[rank], row_type, 0, MPI_COMM_WORLD);

...

MPI_Gatherv(&(local[0][0]), local_rows[rank], row_type, globalptr, local_rows,
            displs, row_type, 0, MPI_COMM_WORLD);

这假设{6, 4}所有等级都知道预期大小您可以让每个人确定性地计算它,也可以只让根计算并分散它(非根等级只需要知道他们自己的行数)。

真正的不规则2D分解

如果您真的想拆分出不仅由整行组成的块,它会变得更加复杂。已经有一个很好的答案了,所以我不会在这里重复。请务必仔细阅读并严格遵守。

由于复杂性,我建议仅在您绝对确定需要它时才这样做。

交叠

您不能使用单个分散发送重叠数据。如果需要的重叠,考虑到邻近的进程之间直接交换数据自己的范围内晕交换

本文收集自互联网,转载请注明来源。

如有侵权,请联系[email protected] 删除。

编辑于
0

我来说两句

0条评论
登录后参与评论

相关文章

来自分类Dev

二维数组和malloc的MPI_Scatter

来自分类Dev

二维数组和malloc的MPI_Scatter

来自分类Dev

使用C的MPI中的MPI_Scatter and Gather for 2D matrix

来自分类Dev

如何使用MPI_Scatter和MPI_Gather计算多个过程的平均值?

来自分类Dev

如何在MPI_Scatter的数组中分散多个变量

来自分类Dev

MPI_Gather 2D数组

来自分类Dev

mpi_gather问题

来自分类Dev

MPI_Gather 发送和接收数组错误

来自分类Dev

输出错误,使用MPI_Scatter的进程将一个数组索引发送到所有其他进程

来自分类Dev

MPI_Scatter无法正常工作

来自分类Dev

MPI_Scatter和MPI_Bcast之间的差异

来自分类Dev

MPI通信大型二维数组

来自分类Dev

MPI广播二维数组

来自分类Dev

二维插值/平滑不均匀采样的角度值

来自分类Dev

具有2D数组的MPI_Gather上的分段错误

来自分类Dev

使用std :: vector与MPI_Gather会崩溃,而c样式数组可以正常工作

来自分类Dev

MPI_Gather不接收数据

来自分类Dev

是MPI_Gather的最佳选择吗?

来自分类Dev

列表中二维数组元素的plt.scatter

来自分类Dev

在MPI中将二维数组旋转90度

来自分类Dev

MPI_IN_PLACE如何与MPI_Scatter一起使用?

来自分类Dev

使用MPI_TYPE_VECTOR代替MPI_GATHER

来自分类Dev

使用MPI_Gather收集从一个数组中的所有进程计算出的总和

来自分类Dev

对2D数组使用MPI Scatter

来自分类Dev

如果不能在进程之间将集合整除,则使用MPI_Scatter

来自分类Dev

MP J Express中Scatter and Gather的工作原理

来自分类Dev

如何遍历不均匀的数据块

来自分类Dev

为什么我的C ++并行程序在MPI_Gather中给出MPI致命错误?

来自分类Dev

为什么我的C ++并行程序在MPI_Gather中给出MPI致命错误?

Related 相关文章

热门标签

归档