原题目:
用MPI设计实现矩阵-向量乘法。进程0读取矩阵和向量,并将任务分配到各个进程,最后进程0打印出计算结果。在你的主目录下,保存的文件名为matvect.c。自定义矩阵和向量大小。
#include "mpi.h"
#include<stdio.h>
#include<stdlib.h>
int main()
{
int i,rank,size,m,n,index=0;
int *a,*matrix,*local_a,*disp,*recvcounts;
MPI_Comm mm = MPI_COMM_WORLD;
MPI_Status status;
MPI_Init(NULL,NULL);
MPI_Comm_size(MPI_COMM_WORLD,&size);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
disp = (int*)malloc(sizeof(int)*size);
recvcounts = (int*)malloc(sizeof(int)*size);
if(rank == 0)
{
printf("enter sizeof matrix(m & n):\n");//m行n列的矩阵
scanf("%d %d",&m,&n);
a = (int*)malloc(sizeof(int)*n);//向量n个数
matrix = (int*)malloc(sizeof(int)*m*n);//矩阵m*n个数
printf("enter vector:\n");
for(i=0;i<n;i++)
{
scanf("%d",&a[i]);//输入向量
}
printf("enter matrix:\n");
for(int i=0;i<m*n;i++)
{
scanf("%d",&matrix[i]);//输入矩阵
}
}
MPI_Bcast(&m,1,MPI_INT,0,mm);
MPI_Bcast(&n,1,MPI_INT,0,mm);
MPI_Bcast(a,n,MPI_INT,0,mm);//把a这个向量里面的值全部广播出去
for(i=0;i<size;i++)
{
recvcounts[i] = (m*n)/size;
if(i<(m*n)%size)
recvcounts[i]++;
disp[i] = (i>0?disp[i-1]+recvcounts[i-1]:0);
}
local_a = (int*)malloc(sizeof(int)*recvcounts[rank]);
MPI_Scatterv(matrix,recvcounts,disp,MPI_INT,local_a,recvcounts[rank],MPI_INT,0,mm);
index=disp[rank];
for(i=0;i<recvcounts[rank];i++)
{
local_a[i]*=a[(index+i)%n];
}
MPI_Gatherv(local_a,recvcounts[rank],MPI_INT,matrix,recvcounts,disp,MPI_INT,0,mm);
if(rank==0)
{
int sum=0;
for(i=0;i<m*n;i++)
{
sum+=matrix[i];
if(i%n==n-1)
{
printf("%d ",sum);
sum=0;
}
}
printf("\n");
}
MPI_Finalize();
}
程序运行:
enter sizeof matrix(m & n):
6 6
enter vector:
1 2 3 4 5 6
enter matrix:
1 2 3 4 5 6
1 2 3 4 5 6
1 2 3 4 5 6
1 2 3 4 5 6
1 2 3 4 5 6
1 2 3 4 5 6
91 91 91 91 91 91
root@cb81ea2c449b:/home/mpiuser# mpirun --allow-run-as-root -np 4 ./e11
enter sizeof matrix(m & n):
7 7
enter vector:
1 2 3 4 5 6 7
enter matrix:
1 2 3 4 5 6 7
1 2 3 4 5 6 7
1 2 3 4 5 6 7
1 2 3 4 5 6 7
1 2 3 4 5 6 7
1 2 3 4 5 6 7
1 2 3 4 5 6 7
140 140 140 140 140 140 140
[cb81ea2c449b:09524] *** Process received signal ***
[cb81ea2c449b:09524] Signal: Segmentation fault (11)
[cb81ea2c449b:09524] Signal code: Address not mapped (1)
[cb81ea2c449b:09524] Failing at address: 0x7ffd00000007
[cb81ea2c449b:09524] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x110c0)[0x7f935c64c0c0]
[cb81ea2c449b:09524] [ 1] /lib/x86_64-linux-gnu/libc.so.6(+0x120e6c)[0x7f935c3bce6c]
[cb81ea2c449b:09524] [ 2] /usr/local/lib/libopen-pal.so.40(opal_unsetenv+0x6c)[0x7f935bd3fccc]
[cb81ea2c449b:09524] [ 3] /usr/local/lib/openmpi/mca_schizo_orte.so(+0xab8)[0x7f935a7a5ab8]
[cb81ea2c449b:09524] [ 4] /usr/local/lib/libopen-rte.so.40(orte_schizo_base_finalize+0x2f)[0x7f935c065b5f]
[cb81ea2c449b:09524] [ 5] /usr/local/lib/libopen-rte.so.40(orte_finalize+0x95)[0x7f935bffbc85]
[cb81ea2c449b:09524] [ 6] /usr/local/lib/libmpi.so.40(ompi_mpi_finalize+0x872)[0x7f935c8a3562]
[cb81ea2c449b:09524] [ 7] [cb81ea2c449b:09521] *** Process received signal ***
[cb81ea2c449b:09521] Signal: Segmentation fault (11)
[cb81ea2c449b:09521] Signal code: Address not mapped (1)
[cb81ea2c449b:09521] Failing at address: 0x7ffc00000007
[cb81ea2c449b:09521] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x110c0)[0x7f7dbfe790c0]
[cb81ea2c449b:09521] [ 1] /lib/x86_64-linux-gnu/libc.so.6(+0x120e6c)[0x7f7dbfbe9e6c]
[cb81ea2c449b:09521] [ 2] /usr/local/lib/libopen-pal.so.40(opal_unsetenv+0x6c)[0x7f7dbf56cccc]
[cb81ea2c449b:09521] [ 3] /usr/local/lib/openmpi/mca_schizo_orte.so(+0xab8)[0x7f7dbdfd2ab8]
[cb81ea2c449b:09521] [ 4] /usr/local/lib/libopen-rte.so.40(orte_schizo_base_finalize+0x2f)[0x7f7dbf892b5f]
[cb81ea2c449b:09521] [ 5] /usr/local/lib/libopen-rte.so.40(orte_finalize+0x95)[0x7f7dbf828c85]
[cb81ea2c449b:09521] [ 6] /usr/local/lib/libmpi.so.40(ompi_mpi_finalize+0x872)[0x7f7dc00d0562]
[cb81ea2c449b:09521] [ 7] [cb81ea2c449b:09518] *** Process received signal ***
[cb81ea2c449b:09518] Signal: Segmentation fault (11)
[cb81ea2c449b:09518] Signal code: Address not mapped (1)
[cb81ea2c449b:09518] Failing at address: 0x7ffd00000007
[cb81ea2c449b:09518] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x110c0)[0x7f5c2c80c0c0]
[cb81ea2c449b:09518] [ 1] /lib/x86_64-linux-gnu/libc.so.6(+0x120e6c)[0x7f5c2c57ce6c]
[cb81ea2c449b:09518] [ 2] /usr/local/lib/libopen-pal.so.40(opal_unsetenv+0x6c)[0x7f5c2beffccc]
[cb81ea2c449b:09518] [ 3] /usr/local/lib/openmpi/mca_schizo_orte.so(+0xab8)[0x7f5c2a965ab8]
[cb81ea2c449b:09518] [ 4] /usr/local/lib/libopen-rte.so.40(orte_schizo_base_finalize+0x2f)[0x7f5c2c225b5f]
[cb81ea2c449b:09518] [ 5] /usr/local/lib/libopen-rte.so.40(orte_finalize+0x95)[0x7f5c2c1bbc85]
[cb81ea2c449b:09518] [ 6] /usr/local/lib/libmpi.so.40(ompi_mpi_finalize+0x872)[0x7f5c2ca63562]
[cb81ea2c449b:09518] [ 7] --------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 0 on node c
为什么m和n值变到了7的时候就开始崩了,前面都可以正常的求矩阵和向量的乘积
不知道你这个问题是否已经解决, 如果还没有解决的话:1 matlab版本
2014a
2 参考文献
[1] 包子阳,余继周,杨杉.智能优化算法及其MATLAB实例(第2版)[M].电子工业出版社,2016.
[2]张岩,吴水根.MATLAB优化算法源代码[M].清华大学出版社,2017.
根据问题描述,我们需要使用MPI实现矩阵-向量乘法,并且在矩阵和向量大小为7时程序崩溃。因此,可能有以下问题:
1.程序存在缺陷,需要调试以找到哪个部分导致崩溃。 2.矩阵和向量大小是7时,内存不足以处理。
针对这两种情况,可以进行以下步骤:
1.检查程序代码以查找问题所在。主要是涉及矩阵分配和乘法过程的代码。可以添加调试语句和断点,也可以使用MPI_Debugger进行调试。
2.考虑优化内存使用。可以使用分块技术,将矩阵和向量分成小块进行计算,以减少内存使用。也可以在计算时,只使用一部分进程来处理矩阵乘法,从而减少内存使用。
下面是一个简单的MPI矩阵-向量乘法示例:
from mpi4py import MPI
import numpy as np
comm=MPI.COMM_WORLD
rank=comm.Get_rank()
size=comm.Get_size()
N=7
a=np.zeros(N,dtype='f')
b=np.zeros((N,N),dtype='f')
#初始化数组
if rank==0:
a[:]=np.random.rand(N)
b[:]=np.random.rand(N,N)
#广播数组
a=comm.bcast(a,root=0)
b=comm.bcast(b,root=0)
#分块矩阵乘法
chunk_size=N//size
chunk_a=np.zeros(chunk_size,dtype='f')
chunk_b=np.zeros((chunk_size,N),dtype='f')
chunk_c=np.zeros(chunk_size,dtype='f')
comm.Scatter(a,chunk_a,root=0)
comm.Scatter(b,chunk_b,root=0)
for i in range(chunk_size):
for j in range(N):
chunk_c[i]+=chunk_a[j]*chunk_b[i,j]
out_data=np.zeros(N,dtype='f')
comm.Gather(chunk_c,out_data,root=0)
if rank==0:
c=np.dot(a,b)
if np.allclose(c,out_data):
print("Results match")
else:
print("Results do not match")
这个代码使用scatter(散射)和gather(收集)操作来拆分矩阵和向量,然后在每个进程内进行计算。最后,将结果收集在进程0中,并进行比较。这可以避免内存溢出,因为每个进程只是处理其中一部分数据。