140 std::map<std::pair<int,int>,
int> nodemap_;
147 const std::set<std::pair<int,int> > &indexset,
148 bool randomize =
false) {
151 std::vector<int> nodemap(indexset.size());
152 for (
int i=0; i<nodemap.size(); i++) nodemap[i] = i%nproc_;
154 std::shuffle(nodemap.begin(), nodemap.end(), std::default_random_engine(0));
157 for (std::set<std::pair<int,int> >::const_iterator i = indexset.begin();
160 nodemap_[*i] = nodemap[node++];
166 const std::map<
int,std::set<int> > &indexmap) {
170 for (std::map<
int,std::set<int> >::const_iterator i = indexmap.begin();
173 for (std::set<int>::const_iterator j = i->second.begin();
174 j != i->second.end();
176 nodemap_[std::make_pair(i->first,*j)] = node++%nproc_;
180 int pair_to_node(
int i,
int j)
const {
181 std::map<std::pair<int,int>,
int>::const_iterator
182 found = nodemap_.find(std::make_pair(i,j));
183 if (found == nodemap_.end()) {
184 throw std::runtime_error(
"PairMapping: requested non-existent block");
186 return found->second;
188 int me()
const {
return me_; }
189 int nproc()
const {
return nproc_; }
190 void local_pairs(std::set<std::pair<int,int> > &pairs)
const {
192 for (std::map<std::pair<int,int>,
int>::const_iterator i=nodemap_.begin();
195 if (i->second == me_) pairs.insert(i->first);
199 for (std::map<std::pair<int,int>,
int>::const_iterator i=nodemap_.begin();
203 <<
" " << i->first.second
204 <<
" -> " << i->second
363 bool clear_source_array,
364 bool ignore_block_distrib_throws)
367 int nproc = msg->n();
374 if (clear_source_array) d.
clear();
378 throw std::runtime_error(
"Array<N,C>::distributed_from_distributed requires MPI");
392 int *nblock_send =
new int[nproc];
393 int *nblock_recv =
new int[nproc];
394 for (
int i=0; i<nproc; i++) nblock_send[i] = 0;
395 for (
typename blockmap_t::const_iterator i = d.blocks_.begin();
396 i != d.blocks_.end();
399 if (ignore_block_distrib_throws) {
414 MPI_Alltoall(nblock_send, 1, MPI_INT,
415 nblock_recv, 1, MPI_INT,
430 int *block_send_offset =
new int[nproc];
431 int *block_recv_offset =
new int[nproc];
432 int *block_send_size =
new int[nproc];
433 int *block_recv_size =
new int[nproc];
434 for (
int i=0; i<nproc; i++) {
435 block_send_size[i] = N*nblock_send[i];
436 block_recv_size[i] = N*nblock_recv[i];
438 block_send_offset[i] = 0;
439 block_recv_offset[i] = 0;
442 block_send_offset[i] = block_send_size[i-1]
443 + block_send_offset[i-1];
444 block_recv_offset[i] = block_recv_size[i-1]
445 + block_recv_offset[i-1];
448 int block_send_size_total = block_send_size[nproc-1]
449 + block_send_offset[nproc-1];
450 int block_recv_size_total = block_recv_size[nproc-1]
451 + block_recv_offset[nproc-1];
456 int *block_send_buffers =
new int[block_send_size_total];
457 std::vector<int> current_send_offset(nproc);
458 std::fill(current_send_offset.begin(), current_send_offset.end(), 0);
459 for (
typename blockmap_t::const_iterator i = d.blocks_.begin();
460 i != d.blocks_.end();
463 if (ignore_block_distrib_throws) {
474 for (
int j=0; j<N; j++)
475 block_send_buffers[block_send_offset[node]
476 + current_send_offset[node]++]
481 int *block_recv_buffers =
new int[block_recv_size_total];
482 custom_alltoallv(block_send_buffers, block_send_size,
483 block_send_offset, MPI_INT,
484 block_recv_buffers, block_recv_size,
485 block_recv_offset, MPI_INT,
490 long ndata_recv_total = 0;
491 int *data_recv_offset =
new int[nproc];
492 int *data_recv_size =
new int[nproc];
493 for (
int i=0; i<nproc; i++) {
494 int nblock = nblock_recv[i];
496 for (
int j=0; j<nblock; j++) {
498 for (
int k=0; k<N; k++) {
499 bi.block(k) = block_recv_buffers[irecv++];
501 ndata += block_size(bi);
502 add_unallocated_block(bi);
504 ndata_recv_total += ndata;
505 data_recv_size[i] = ndata;
507 data_recv_offset[i] = 0;
509 data_recv_offset[i] = data_recv_offset[i-1] + data_recv_size[i-1];
515 int *data_send_offset =
new int[nproc];
516 int *data_send_size =
new int[nproc];
518 double *current_send_buffer = data_send_buffers;
519 for (
int i=0; i<nproc; i++) {
520 int nblock = nblock_send[i];
521 data_send_size[i] = 0;
522 for (
int j=0; j<nblock; j++) {
524 for (
int k=0; k<N; k++) {
525 bi.block(k) = block_send_buffers[isend++];
527 int ndata_in_block = block_size(bi);
528 memcpy(current_send_buffer, d.blocks_.find(bi)->second,
529 ndata_in_block*
sizeof(
double));
530 current_send_buffer += ndata_in_block;
531 data_send_size[i] += ndata_in_block;
534 data_send_offset[i] = 0;
536 data_send_offset[i] = data_send_offset[i-1] + data_send_size[i-1];
538 delete[] block_send_buffers;
539 delete[] block_send_offset;
540 delete[] block_send_size;
541 if (clear_source_array) d.
clear();
544 double *data_recv_buffers =
new double[ndata_recv_total];
545 custom_alltoallv(data_send_buffers, data_send_size,
546 data_send_offset, MPI_DOUBLE,
547 data_recv_buffers, data_recv_size,
548 data_recv_offset, MPI_DOUBLE,
550 delete[] data_send_buffers;
551 delete[] data_send_offset;
552 delete[] data_send_size;
557 double *current_recv_buffer = data_recv_buffers;
558 for (
int i=0; i<nproc; i++) {
559 int nblock = nblock_recv[i];
560 for (
int j=0; j<nblock; j++) {
562 for (
int k=0; k<N; k++) {
563 bi.block(k) = block_recv_buffers[irecv++];
565 typename blockmap_t::iterator iblock = blocks_.find(bi);
566 double *data = iblock->second;
567 int ndata_in_block = block_size(bi);
568 memcpy(data, current_recv_buffer, ndata_in_block*
sizeof(
double));
569 current_recv_buffer += ndata_in_block;
573 delete[] block_recv_buffers;
574 delete[] block_recv_offset;
575 delete[] block_recv_size;
577 delete[] data_recv_buffers;
578 delete[] data_recv_offset;
579 delete[] data_recv_size;
581 delete[] nblock_send;
582 delete[] nblock_recv;
591 std::vector<int> nblock(nproc);
592 std::fill(nblock.begin(), nblock.end(), 0);
593 for (
typename blockmap_t::const_iterator i = d.blocks_.begin();
594 i != d.blocks_.end();
601 std::vector<int*> buffer(nproc);
602 for (
int i=0; i<nproc; i++) {
604 buffer[i] =
new int[1 + N*nblock[i]];
605 buffer[i][0] = nblock[i];
614 std::vector<int> ndata(nproc);
615 std::fill(ndata.begin(), ndata.end(), 1);
616 for (
typename blockmap_t::const_iterator i = d.blocks_.begin();
617 i != d.blocks_.end();
620 int &counter = ndata[node];
621 for (
int j=0; j<N; j++) {
623 add_unallocated_block(i->first);
626 buffer[node][counter++] = i->first.block(j);
632 int max_nblock = *std::max_element(nblock.begin(), nblock.end());
633 msg->max(max_nblock);
634 int *remote_blocks =
new int[1 + max_nblock*N];
635 int source = (me + nproc - 1)%nproc;
636 for (
int next_node = (me+1)%nproc;
638 next_node = (next_node+1)%nproc) {
640 MPI_Request send_req, recv_req;
641 MPI_Irecv(remote_blocks,1+max_nblock*N,MPI_INT,source,
642 100+source,MPI_COMM_WORLD,&recv_req);
643 MPI_Isend(buffer[next_node],1+nblock[next_node]*N,MPI_INT,next_node,
644 100+me,MPI_COMM_WORLD,&send_req);
646 MPI_Wait(&recv_req,&status);
648 int nblock = remote_blocks[iarray++];
649 for (
int i=0; i<nblock; i++) {
651 for (
int j=0; j<N; j++) bi.block(j) = remote_blocks[iarray++];
652 add_unallocated_block(bi);
654 MPI_Wait(&send_req,&status);
655 source = (source + nproc - 1)%nproc;
657 throw std::runtime_error(
"Array<N,C>::distributed_from_distributed requires MPI");
660 delete[] remote_blocks;
661 for (
int i=0; i<nproc; i++)
delete[] buffer[i];
669 for (
typename blockmap_t::const_iterator i = d.blocks_.begin();
670 i != d.blocks_.end();
672 int sz = block_size(i->first);
673 if (sz > maxsz) maxsz = sz;
677 MPI_Pack_size(maxsz, MPI_DOUBLE, MPI_COMM_WORLD, &maxalloc);
678 for (
int i=0; i<N; i++) {
680 MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &isz);
683 char *outgoing_buffer =
new char[maxalloc];
688 int n_to_be_received = blocks_.size() - nblock[me];
690 const int nrecv_req = 20;
691 std::vector<char *> incoming_buffer(nrecv_req);
692 for (
int i=0; i<nrecv_req; i++) {
693 incoming_buffer[i] =
new char[maxalloc];
695 MPI_Request recv_req[nrecv_req];
696 for (
int i=0; i<nrecv_req; i++) {
697 if (i<n_to_be_received) {
698 MPI_Irecv(incoming_buffer[i], maxalloc, MPI_BYTE, MPI_ANY_SOURCE,
699 99, MPI_COMM_WORLD, &recv_req[i]);
702 recv_req[i] = MPI_REQUEST_NULL;
706 for (
typename blockmap_t::const_iterator i = d.blocks_.begin();
707 i != d.blocks_.end() || n_to_be_received > 0;) {
709 MPI_Request send_req;
715 bool need_wait_for_send =
false;
716 if (i != d.blocks_.end()) {
722 typename blockmap_t::iterator loc = blocks_.find(i->first);
723 memcpy(loc->second, i->second, ndat*
sizeof(
double));
730 int outgoing_loc = 0;
731 for (
int j=0; j<N; j++) {
732 int jval = i->first.block(j);
733 MPI_Pack(&jval, 1, MPI_INT, outgoing_buffer, maxalloc,
734 &outgoing_loc, MPI_COMM_WORLD);
736 MPI_Pack(i->second, ndat, MPI_DOUBLE, outgoing_buffer, maxalloc,
737 &outgoing_loc, MPI_COMM_WORLD);
738 MPI_Isend(outgoing_buffer,outgoing_loc,MPI_BYTE,node,
739 99,MPI_COMM_WORLD,&send_req);
740 need_wait_for_send =
true;
742 throw std::runtime_error(
"LMP2:: cannot send remote block w/o MPI");
750 for (
int irecv=0; irecv<nrecv_req; irecv++) {
751 if (recv_req[irecv] == MPI_REQUEST_NULL)
continue;
754 MPI_Test(&recv_req[irecv],&flag,&status);
760 for (
int j=0; j<N; j++) {
762 MPI_Unpack(incoming_buffer[irecv], maxalloc, &position, &index,
763 1, MPI_INT, MPI_COMM_WORLD);
766 typename blockmap_t::iterator loc = blocks_.find(bi);
767 int block_sz = block_size(bi);
768 MPI_Unpack(incoming_buffer[irecv], maxalloc, &position, loc->second,
769 block_sz, MPI_DOUBLE, MPI_COMM_WORLD);
772 if (n_to_be_received >= nrecv_req) {
773 MPI_Irecv(incoming_buffer[irecv], maxalloc, MPI_BYTE, MPI_ANY_SOURCE,
774 99, MPI_COMM_WORLD, &recv_req[irecv]);
780 if (need_wait_for_send) {
782 MPI_Wait(&send_req,&status);
787 for (
int i=0; i<nrecv_req; i++) {
788 delete[] incoming_buffer[i];
790 delete[] outgoing_buffer;
792 if (clear_source_array) d.
clear();