MPQC 3.0.0-alpha
Loading...
Searching...
No Matches
parallel.h
1
2/*
3 * Copyright 2009 Sandia Corporation. Under the terms of Contract
4 * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
5 * retains certain rights in this software.
6 *
7 * This file is a part of the MPQC LMP2 library.
8 *
9 * The MPQC LMP2 library is free software: you can redistribute it
10 * and/or modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation, either
12 * version 3 of the License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program. If not, see
21 * <http://www.gnu.org/licenses/>.
22 *
23 */
24
25#ifndef _chemistry_qc_lmp2_parallel_h
26#define _chemistry_qc_lmp2_parallel_h
27
28#ifdef HAVE_MPI
29#define OMPI_SKIP_MPICXX
30#define MPICH_SKIP_MPICXX
31#include <mpi.h>
32#endif
33
34#include <memory>
35#include <algorithm>
36#include <random>
37#include <mpqc_config.h>
38#include <util/misc/scint.h>
39#include <util/misc/regtime.h>
40
41#include <chemistry/qc/lmp2/collective.h>
42
43#define NEW_DIST_DATA
44
45namespace sc {
46
47namespace sma2 {
48
49template <int N>
50void
52 sc::Timer tim;
53 tim.enter("accum");
54 //tim_enter("sum");
55 //grp->sum(data_->data(), data_->ndata());
56 //tim_exit("sum");
57#ifndef HAVE_MPI
58 tim.enter("sum0");
59 grp->sum(data_->data(), data_->ndata(), 0, 0);
60 tim.exit("sum0");
61 tim.enter("bcast");
62 grp->bcast(data_->data(), data_->ndata());
63 tim.exit("bcast");
64#else
65 allsum(data_->data(), data_->ndata());
66#endif
67 tim.enter("bounds");
68 compute_bounds();
69 tim.exit("bounds");
70 tim.exit("accum");
71}
72
73template <int N>
74void
76{
77 sc::Timer tim;
78 int me = msg->me();
79 int nproc = msg->n();
80
81 // fetch the block map entries from all the other nodes
82 tim.enter("parallel_union");
83 int nblock_local = n_block();
84 int nblock_max = n_block();
85 msg->max(nblock_max);
86 int *local_blocks = new int[1 + nblock_local*N];
87 int *remote_blocks = new int[1 + nblock_max*N];
88 int iarray = 0;
89 local_blocks[iarray++] = nblock_local;
90 for (typename blockmap_t::iterator i = blocks_.begin();
91 i != blocks_.end();
92 i++) {
93 for (int j=0; j<N; j++) {
94 local_blocks[iarray++] = i->first.block(j);
95 }
96 }
97 int source = (me + nproc - 1)%nproc; // me - 1 mod nproc
98 for (int next_node = (me+1)%nproc;
99 next_node != me;
100 next_node = (next_node+1)%nproc) {
101#ifdef HAVE_MPI
102 MPI_Request send_req, recv_req;
103 MPI_Irecv(remote_blocks,1+nblock_max*N,MPI_INT,source,
104 100+source,MPI_COMM_WORLD,&recv_req);
105 MPI_Isend(local_blocks,1+nblock_local*N,MPI_INT,next_node,
106 100+me,MPI_COMM_WORLD,&send_req);
107 MPI_Status status;
108 MPI_Wait(&recv_req,&status); // wait for the recv to complete
109 int iarray = 0;
110 int nblock = remote_blocks[iarray++];
111 for (int i=0; i<nblock; i++) {
112 BlockInfo<N> bi;
113 for (int j=0; j<N; j++) bi.block(j) = remote_blocks[iarray++];
114 add_unallocated_block(bi);
115 }
116 MPI_Wait(&send_req,&status); // wait for the send to complete
117 source = (source + nproc - 1)%nproc; // source - 1 mod nproc
118#else
119 throw std::runtime_error("Array<N,C>::replicated_from_distributed requires MPI");
120#endif
121 }
122 delete[] local_blocks;
123 delete[] remote_blocks;
124 tim.exit("parallel_union");
125}
126
129template <int N>
130class BlockDistrib {
131 public:
133 virtual int block_to_node(const BlockInfo<N> &b) const = 0;
134 virtual ~BlockDistrib() {}
135};
136
140 std::map<std::pair<int,int>,int> nodemap_;
141 int nproc_;
142 int me_;
143 public:
144 // Distributes two indices among the nodes. The passed
145 // indexset must be identical on all nodes.
147 const std::set<std::pair<int,int> > &indexset,
148 bool randomize = false) {
149 nproc_ = grp->n();
150 me_= grp->me();
151 std::vector<int> nodemap(indexset.size());
152 for (int i=0; i<nodemap.size(); i++) nodemap[i] = i%nproc_;
153 if (randomize) {
154 std::shuffle(nodemap.begin(), nodemap.end(), std::default_random_engine(0));
155 }
156 int node=0;
157 for (std::set<std::pair<int,int> >::const_iterator i = indexset.begin();
158 i != indexset.end();
159 i++) {
160 nodemap_[*i] = nodemap[node++];
161 }
162 }
163 // Distributes indices i0 and i1 among the nodes. The passed
164 // indexmap is identical on all nodes.
166 const std::map<int,std::set<int> > &indexmap) {
167 nproc_ = grp->n();
168 me_= grp->me();
169 int node=0;
170 for (std::map<int,std::set<int> >::const_iterator i = indexmap.begin();
171 i != indexmap.end();
172 i++) {
173 for (std::set<int>::const_iterator j = i->second.begin();
174 j != i->second.end();
175 j++) {
176 nodemap_[std::make_pair(i->first,*j)] = node++%nproc_;
177 }
178 }
179 }
180 int pair_to_node(int i, int j) const {
181 std::map<std::pair<int,int>,int>::const_iterator
182 found = nodemap_.find(std::make_pair(i,j));
183 if (found == nodemap_.end()) {
184 throw std::runtime_error("PairMapping: requested non-existent block");
185 }
186 return found->second;
187 }
188 int me() const { return me_; }
189 int nproc() const { return nproc_; }
190 void local_pairs(std::set<std::pair<int,int> > &pairs) const {
191 pairs.clear();
192 for (std::map<std::pair<int,int>,int>::const_iterator i=nodemap_.begin();
193 i != nodemap_.end();
194 i++) {
195 if (i->second == me_) pairs.insert(i->first);
196 }
197 }
198 void print() const {
199 for (std::map<std::pair<int,int>,int>::const_iterator i=nodemap_.begin();
200 i != nodemap_.end();
201 i++) {
202 sc::ExEnv::out0() << i->first.first
203 << " " << i->first.second
204 << " -> " << i->second
205 << std::endl;
206 }
207 }
208};
209
212template <int N>
214 int i0_, i1_;
215 sc::Ref<PairMapping> mapping_;
216 public:
218 int i0, int i1,
219 const std::set<std::pair<int,int> > &indexset,
220 bool randomize = false) {
221 i0_ = i0; i1_ = i1;
222 mapping_ = new PairMapping(grp,indexset,randomize);
223 }
225 int i0, int i1,
226 const std::map<int,std::set<int> > &indexmap) {
227 i0_ = i0; i1_ = i1;
228 mapping_ = new PairMapping(grp,indexmap);
229 }
230 // Distributes indices i0 and i1 among the nodes.
231 // The PairMapping object describes the distribution.
232 PairBlockDistrib(int i0, int i1,
233 const sc::Ref<PairMapping> &mapping) {
234 i0_ = i0; i1_ = i1;
235 mapping_ = mapping;
236 }
237 int block_to_node(const BlockInfo<N> &b) const {
238 return mapping_->pair_to_node(b.block(i0_),b.block(i1_));
239 }
240 void local_pairs(std::set<std::pair<int,int> > &pairs) const {
241 mapping_->local_pairs(pairs);
242 }
243};
244
248template <int N>
250 int nindex_;
251 int indices_[N];
252 sc::sc_uint64_t size_[N];
253 int nindex_i_[N];
254 int nproc_;
255 int me_;
256 void init_sizes(const Array<N> &a) {
257 size_[0] = 1;
258 for (int i=1; i<N; i++) {
259 size_[i] = size_[i-1] * a.index(i).nindex();
260 }
261 for (int i=0; i<N; i++) {
262 nindex_i_[i] = a.index(i).nindex();
263 }
264 }
265 public:
267 int i0): nindex_(1) {
268 me_ = grp->me();
269 indices_[0]=i0;
270 nproc_ = grp->n();
271 init_sizes(a);
272 }
274 int i0, int i1): nindex_(2) {
275 me_ = grp->me();
276 indices_[0]=i0; indices_[1]=i1;
277 nproc_ = grp->n();
278 init_sizes(a);
279 }
281 int i0, int i1, int i2): nindex_(3) {
282 me_ = grp->me();
283 indices_[0]=i0; indices_[1]=i1; indices_[2]=i2;
284 nproc_ = grp->n();
285 init_sizes(a);
286 }
287 int block_to_node(const BlockInfo<N> &b) const {
288 sc::sc_uint64_t loc = 0;
289 for (int i=0; i<nindex_; i++) {
290 loc += size_[i] * b.block(indices_[i]);
291 }
292 return loc%nproc_;
293 }
294 // Only valid for a single index!
295 int nlocalindex(int node = -1) {
296 if (node == -1) node = me_;
297 int r = nindex_i_[0] / nproc_;
298 if (me_ < nindex_i_[0] % nproc_) r++;
299 return r;
300 }
301 // Only valid for a single index!
302 int localindex(int i, int node = -1) {
303 if (node == -1) node = me_;
304 return me_ + nproc_*i;
305 }
306};
307
308template <int N>
309void
311 const Array<N> &d)
312{
313 sc::Timer tim;
314 tim.enter("repl_from_dist");
315
316 int me = msg->me();
317 int nproc = msg->n();
318// if (nproc == 1) {
319// operator = (d);
320// return;
321// }
322
323 // initialize the indices, etc, in the array
324 // and copy the blockmap entries from the local node
325 init_blocks(d, 0.0); // using tol=0.0 causes all blocks to be used
326#ifdef USE_BOUND
327 bound_ = d.bound_;
328 msg->max(bound_);
329#endif
330 tolerance_ = d.tolerance_;
331
332 parallel_union(msg);
333
334 // allocate and initialize storage
335 zero();
336
337 // copy the data from the local node
338 tim.enter("local");
339 for (typename blockmap_t::const_iterator i = d.blocks_.begin();
340 i != d.blocks_.end();
341 i++) {
342 int ndat = d.block_size(i->first);
343 typename blockmap_t::iterator loc = blocks_.find(i->first);
344 // must make sure block was found, since init_blocks may
345 // throw out blocks with a bound below the tolerance
346 if (loc != blocks_.end()) {
347 memcpy(loc->second, i->second, ndat*sizeof(double));
348 }
349 }
350 tim.exit("local");
351
352 // accumulate the data on all nodes
353 parallel_accumulate(msg);
354
355 tim.exit("repl_from_dist");
356}
357
358template <int N>
359void
361 const BlockDistrib<N> &distrib,
362 Array<N> &d,
363 bool clear_source_array,
364 bool ignore_block_distrib_throws)
365{
366 int me = msg->me();
367 int nproc = msg->n();
368
369 // If MPI exists, then the MPI routines will be used on even a single
370 // processor. This is done for testing purposes.
371#ifndef HAVE_MPI
372 if (nproc == 1) {
373 assign_all(d);
374 if (clear_source_array) d.clear();
375 return;
376 }
377 else {
378 throw std::runtime_error("Array<N,C>::distributed_from_distributed requires MPI");
379 }
380#else // HAVE_MPI
381 // clj debug
382 //d.print(msg,true,std::cout);
383 // clj end debug
384
385 // initialize the array
386 clear();
387 set_indices(d.indices());
388
389#if 1
390 // compute number of blocks and amount of data moving from this node to
391 // each other node
392 int *nblock_send = new int[nproc];
393 int *nblock_recv = new int[nproc];
394 for (int i=0; i<nproc; i++) nblock_send[i] = 0;
395 for (typename blockmap_t::const_iterator i = d.blocks_.begin();
396 i != d.blocks_.end();
397 i++) {
398 int node;
399 if (ignore_block_distrib_throws) {
400 try {
401 node = distrib.block_to_node(i->first);
402 }
403 catch (...) {
404 continue;
405 }
406 }
407 else {
408 node = distrib.block_to_node(i->first);
409 }
410 nblock_send[node]++;
411 }
412
413 // Exchange the size of buffers that will be sent from each node.
414 MPI_Alltoall(nblock_send, 1, MPI_INT,
415 nblock_recv, 1, MPI_INT,
416 MPI_COMM_WORLD);
417
418 // clj debug
419// for (int i=0; i<nproc; i++) {
420// std::cout << me << "nblock_send[" << i << "] = " << nblock_send[i]
421// << std::endl;
422// }
423// for (int i=0; i<nproc; i++) {
424// std::cout << me << "nblock_recv[" << i << "] = " << nblock_recv[i]
425// << std::endl;
426// }
427 // clj end debug
428
429 // Find out how much data we'll send and recv and what the offsets are.
430 int *block_send_offset = new int[nproc];
431 int *block_recv_offset = new int[nproc];
432 int *block_send_size = new int[nproc];
433 int *block_recv_size = new int[nproc];
434 for (int i=0; i<nproc; i++) {
435 block_send_size[i] = N*nblock_send[i];
436 block_recv_size[i] = N*nblock_recv[i];
437 if (i==0) {
438 block_send_offset[i] = 0;
439 block_recv_offset[i] = 0;
440 }
441 else {
442 block_send_offset[i] = block_send_size[i-1]
443 + block_send_offset[i-1];
444 block_recv_offset[i] = block_recv_size[i-1]
445 + block_recv_offset[i-1];
446 }
447 }
448 int block_send_size_total = block_send_size[nproc-1]
449 + block_send_offset[nproc-1];
450 int block_recv_size_total = block_recv_size[nproc-1]
451 + block_recv_offset[nproc-1];
452
453 // Allocate the blockinfo buffers and copy data into them. The ordering
454 // of data is nblock, blockinfo[0], ...,
455 // blockinfo[n-1].
456 int *block_send_buffers = new int[block_send_size_total];
457 std::vector<int> current_send_offset(nproc);
458 std::fill(current_send_offset.begin(), current_send_offset.end(), 0);
459 for (typename blockmap_t::const_iterator i = d.blocks_.begin();
460 i != d.blocks_.end();
461 i++) {
462 int node;
463 if (ignore_block_distrib_throws) {
464 try {
465 node = distrib.block_to_node(i->first);
466 }
467 catch (...) {
468 continue;
469 }
470 }
471 else {
472 node = distrib.block_to_node(i->first);
473 }
474 for (int j=0; j<N; j++)
475 block_send_buffers[block_send_offset[node]
476 + current_send_offset[node]++]
477 = i->first.block(j);
478 }
479
480 // Exchange the blockinfo buffers.
481 int *block_recv_buffers = new int[block_recv_size_total];
482 custom_alltoallv(block_send_buffers, block_send_size,
483 block_send_offset, MPI_INT,
484 block_recv_buffers, block_recv_size,
485 block_recv_offset, MPI_INT,
486 MPI_COMM_WORLD);
487
488 // Add the blockinfos to this Array object and allocate the data.
489 int irecv = 0;
490 long ndata_recv_total = 0;
491 int *data_recv_offset = new int[nproc];
492 int *data_recv_size = new int[nproc];
493 for (int i=0; i<nproc; i++) {
494 int nblock = nblock_recv[i];
495 long ndata = 0;
496 for (int j=0; j<nblock; j++) {
497 BlockInfo<N> bi;
498 for (int k=0; k<N; k++) {
499 bi.block(k) = block_recv_buffers[irecv++];
500 }
501 ndata += block_size(bi);
502 add_unallocated_block(bi);
503 }
504 ndata_recv_total += ndata;
505 data_recv_size[i] = ndata;
506 if (i == 0)
507 data_recv_offset[i] = 0;
508 else
509 data_recv_offset[i] = data_recv_offset[i-1] + data_recv_size[i-1];
510 }
511
512 // Allocate the buffers for the exchange of data and place the data into
513 // the send buffer.
514 double *data_send_buffers = new double[d.n_element_allocated()];
515 int *data_send_offset = new int[nproc];
516 int *data_send_size = new int[nproc];
517 int isend = 0;
518 double *current_send_buffer = data_send_buffers;
519 for (int i=0; i<nproc; i++) {
520 int nblock = nblock_send[i];
521 data_send_size[i] = 0;
522 for (int j=0; j<nblock; j++) {
523 BlockInfo<N> bi;
524 for (int k=0; k<N; k++) {
525 bi.block(k) = block_send_buffers[isend++];
526 }
527 int ndata_in_block = block_size(bi);
528 memcpy(current_send_buffer, d.blocks_.find(bi)->second,
529 ndata_in_block*sizeof(double));
530 current_send_buffer += ndata_in_block;
531 data_send_size[i] += ndata_in_block;
532 }
533 if (i == 0)
534 data_send_offset[i] = 0;
535 else
536 data_send_offset[i] = data_send_offset[i-1] + data_send_size[i-1];
537 }
538 delete[] block_send_buffers;
539 delete[] block_send_offset;
540 delete[] block_send_size;
541 if (clear_source_array) d.clear();
542
543 // Exchange the data
544 double *data_recv_buffers = new double[ndata_recv_total];
545 custom_alltoallv(data_send_buffers, data_send_size,
546 data_send_offset, MPI_DOUBLE,
547 data_recv_buffers, data_recv_size,
548 data_recv_offset, MPI_DOUBLE,
549 MPI_COMM_WORLD);
550 delete[] data_send_buffers;
551 delete[] data_send_offset;
552 delete[] data_send_size;
553
554 // Update the array's data.
555 allocate_blocks();
556 irecv = 0;
557 double *current_recv_buffer = data_recv_buffers;
558 for (int i=0; i<nproc; i++) {
559 int nblock = nblock_recv[i];
560 for (int j=0; j<nblock; j++) {
561 BlockInfo<N> bi;
562 for (int k=0; k<N; k++) {
563 bi.block(k) = block_recv_buffers[irecv++];
564 }
565 typename blockmap_t::iterator iblock = blocks_.find(bi);
566 double *data = iblock->second;
567 int ndata_in_block = block_size(bi);
568 memcpy(data, current_recv_buffer, ndata_in_block*sizeof(double));
569 current_recv_buffer += ndata_in_block;
570 }
571 }
572
573 delete[] block_recv_buffers;
574 delete[] block_recv_offset;
575 delete[] block_recv_size;
576
577 delete[] data_recv_buffers;
578 delete[] data_recv_offset;
579 delete[] data_recv_size;
580
581 delete[] nblock_send;
582 delete[] nblock_recv;
583
584#else
585
587 // fetch my block map entries from all the other nodes
588 // and send d's contributions to all the other nodes
589
590 // compute number of blocks moving from this node to each other node
591 std::vector<int> nblock(nproc);
592 std::fill(nblock.begin(), nblock.end(), 0);
593 for (typename blockmap_t::const_iterator i = d.blocks_.begin();
594 i != d.blocks_.end();
595 i++) {
596 int node = distrib.block_to_node(i->first);
597 nblock[node]++;
598 }
599
600 // allocate buffers for each node
601 std::vector<int*> buffer(nproc);
602 for (int i=0; i<nproc; i++) {
603 if (i != me) {
604 buffer[i] = new int[1 + N*nblock[i]];
605 buffer[i][0] = nblock[i];
606 }
607 else {
608 buffer[i] = 0;
609 }
610 }
611
612 // pack the buffer for each node with blockinfo data
613 // local blocks are directly inserted
614 std::vector<int> ndata(nproc);
615 std::fill(ndata.begin(), ndata.end(), 1); // fill w/1 since all contain nblocks 1st
616 for (typename blockmap_t::const_iterator i = d.blocks_.begin();
617 i != d.blocks_.end();
618 i++) {
619 int node = distrib.block_to_node(i->first);
620 int &counter = ndata[node];
621 for (int j=0; j<N; j++) {
622 if (node == me) {
623 add_unallocated_block(i->first);
624 }
625 else {
626 buffer[node][counter++] = i->first.block(j);
627 }
628 }
629 }
630
631 // send the blockinfo data from this node to each other node
632 int max_nblock = *std::max_element(nblock.begin(), nblock.end());
633 msg->max(max_nblock);
634 int *remote_blocks = new int[1 + max_nblock*N];
635 int source = (me + nproc - 1)%nproc; // me - 1 mod nproc
636 for (int next_node = (me+1)%nproc;
637 next_node != me;
638 next_node = (next_node+1)%nproc) {
639#ifdef HAVE_MPI
640 MPI_Request send_req, recv_req;
641 MPI_Irecv(remote_blocks,1+max_nblock*N,MPI_INT,source,
642 100+source,MPI_COMM_WORLD,&recv_req);
643 MPI_Isend(buffer[next_node],1+nblock[next_node]*N,MPI_INT,next_node,
644 100+me,MPI_COMM_WORLD,&send_req);
645 MPI_Status status;
646 MPI_Wait(&recv_req,&status); // wait for the recv to complete
647 int iarray = 0;
648 int nblock = remote_blocks[iarray++];
649 for (int i=0; i<nblock; i++) {
650 BlockInfo<N> bi;
651 for (int j=0; j<N; j++) bi.block(j) = remote_blocks[iarray++];
652 add_unallocated_block(bi);
653 }
654 MPI_Wait(&send_req,&status); // wait for the send to complete
655 source = (source + nproc - 1)%nproc; // source - 1 mod nproc
656#else
657 throw std::runtime_error("Array<N,C>::distributed_from_distributed requires MPI");
658#endif
659 }
660 delete[] remote_blocks;
661 for (int i=0; i<nproc; i++) delete[] buffer[i];
662
664 // allocate storage
665 allocate_blocks();
666 // find the largest packet size and allocate storage
667 int maxsz = 0;
668#ifdef HAVE_MPI
669 for (typename blockmap_t::const_iterator i = d.blocks_.begin();
670 i != d.blocks_.end();
671 i++) {
672 int sz = block_size(i->first);
673 if (sz > maxsz) maxsz = sz;
674 }
675 msg->max(maxsz);
676 int maxalloc;
677 MPI_Pack_size(maxsz, MPI_DOUBLE, MPI_COMM_WORLD, &maxalloc);
678 for (int i=0; i<N; i++) {
679 int isz;
680 MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &isz);
681 maxalloc += isz;
682 }
683 char *outgoing_buffer = new char[maxalloc];
684#endif
685
687 // send/recv data
688 int n_to_be_received = blocks_.size() - nblock[me];
689#ifdef HAVE_MPI
690 const int nrecv_req = 20;
691 std::vector<char *> incoming_buffer(nrecv_req);
692 for (int i=0; i<nrecv_req; i++) {
693 incoming_buffer[i] = new char[maxalloc];
694 }
695 MPI_Request recv_req[nrecv_req];
696 for (int i=0; i<nrecv_req; i++) {
697 if (i<n_to_be_received) {
698 MPI_Irecv(incoming_buffer[i], maxalloc, MPI_BYTE, MPI_ANY_SOURCE,
699 99, MPI_COMM_WORLD, &recv_req[i]);
700 }
701 else {
702 recv_req[i] = MPI_REQUEST_NULL;
703 }
704 }
705#endif
706 for (typename blockmap_t::const_iterator i = d.blocks_.begin();
707 i != d.blocks_.end() || n_to_be_received > 0;) {
708#ifdef HAVE_MPI
709 MPI_Request send_req;
710#endif
711 int node;
712// std::cout << sc::scprintf("%d end = %d nleft = %d\n",
713// me, int(i == d.blocks_.end()), n_to_be_received)
714// << std::flush;
715 bool need_wait_for_send = false;
716 if (i != d.blocks_.end()) {
717 node = distrib.block_to_node(i->first);
718 int ndat = d.block_size(i->first);
719 if (node == me) {
720// std::cout << sc::scprintf(" %d processing local block\n",me)
721// << std::flush;
722 typename blockmap_t::iterator loc = blocks_.find(i->first);
723 memcpy(loc->second, i->second, ndat*sizeof(double));
724 }
725 else {
726// std::cout << sc::scprintf(" %d sending block to %d\n",me,node)
727// << std::flush;
728 // pack up block and send it off
729#ifdef HAVE_MPI
730 int outgoing_loc = 0;
731 for (int j=0; j<N; j++) {
732 int jval = i->first.block(j);
733 MPI_Pack(&jval, 1, MPI_INT, outgoing_buffer, maxalloc,
734 &outgoing_loc, MPI_COMM_WORLD);
735 }
736 MPI_Pack(i->second, ndat, MPI_DOUBLE, outgoing_buffer, maxalloc,
737 &outgoing_loc, MPI_COMM_WORLD);
738 MPI_Isend(outgoing_buffer,outgoing_loc,MPI_BYTE,node,
739 99,MPI_COMM_WORLD,&send_req);
740 need_wait_for_send = true;
741#else
742 throw std::runtime_error("LMP2:: cannot send remote block w/o MPI");
743#endif
744 }
745 i++;
746 }
747#ifdef HAVE_MPI
748 // See if any data is ready to be recved
749 int nrecvd = 0;
750 for (int irecv=0; irecv<nrecv_req; irecv++) {
751 if (recv_req[irecv] == MPI_REQUEST_NULL) continue;
752 int flag = 1;
753 MPI_Status status;
754 MPI_Test(&recv_req[irecv],&flag,&status);
755 if (flag) {
756// std::cout << sc::scprintf(" %d got incoming\n",me)
757// << std::flush;
758 BlockInfo<N> bi;
759 int position = 0;
760 for (int j=0; j<N; j++) {
761 int index;
762 MPI_Unpack(incoming_buffer[irecv], maxalloc, &position, &index,
763 1, MPI_INT, MPI_COMM_WORLD);
764 bi.block(j) = index;
765 }
766 typename blockmap_t::iterator loc = blocks_.find(bi);
767 int block_sz = block_size(bi);
768 MPI_Unpack(incoming_buffer[irecv], maxalloc, &position, loc->second,
769 block_sz, MPI_DOUBLE, MPI_COMM_WORLD);
770 n_to_be_received--;
771 // only repost the receive if there are not enough recv requests remaining
772 if (n_to_be_received >= nrecv_req) {
773 MPI_Irecv(incoming_buffer[irecv], maxalloc, MPI_BYTE, MPI_ANY_SOURCE,
774 99, MPI_COMM_WORLD, &recv_req[irecv]);
775 }
776 }
777 }
778
779 // Wait until data is sent
780 if (need_wait_for_send) {
781 MPI_Status status;
782 MPI_Wait(&send_req,&status);
783 }
784#endif
785 }
786#ifdef HAVE_MPI
787 for (int i=0; i<nrecv_req; i++) {
788 delete[] incoming_buffer[i];
789 }
790 delete[] outgoing_buffer;
791#endif
792 if (clear_source_array) d.clear();
793#endif
794#endif // HAVE_MPI
795
796 compute_bounds();
797
798 // clj debug
799 //print(msg,true,std::cout);
800 // clj end debug
801}
802
803}
804
805}
806
807#endif
static std::ostream & out0()
Return an ostream that writes from node 0.
The base class for all reference counted objects.
Definition ref.h:192
A template class that maintains references counts.
Definition ref.h:361
The Timer class uses RegionTimer to time intervals in an exception safe manner.
Definition regtime.h:181
void exit(const char *region=0)
Exit the current timing region.
void enter(const char *region)
Begin timing, using the given timing region name.
Implements a block sparse tensor.
Definition sma.h:1247
size_t n_element_allocated() const
Returns the number of elements contained in blocks.
Definition sma.h:1702
const Range * indices() const
Gets the Range array.
Definition sma.h:1505
void distributed_from_distributed(const sc::Ref< sc::MessageGrp > &, const BlockDistrib< N > &, Array< N > &, bool clear_source_array=false, bool ignore_block_distrib_throws=false)
Redistribute an array.
Definition parallel.h:360
void clear()
Make this array store nothing.
Definition sma.h:1475
void parallel_accumulate(const sc::Ref< sc::MessageGrp > &grp)
Performs a reduce-broadcast on the data.
Definition parallel.h:51
int block_size(const BlockInfo< N > &bi) const
Return size of the given block.
Definition sma.h:1507
void replicated_from_distributed(const sc::Ref< sc::MessageGrp > &, const Array< N > &)
Convert a distributed array to a replicated array.
Definition parallel.h:310
const Range & index(int i) const
Gets the i'th Range.
Definition sma.h:1503
void parallel_union(const sc::Ref< sc::MessageGrp > &grp)
Makes the block distribution the same on all processes.
Definition parallel.h:75
Provides information about how blocks are distributed onto processes.
Definition sma.h:1205
virtual int block_to_node(const BlockInfo< N > &b) const =0
Given a block, returns the node on which it resides.
BlockInfo stores info about a block of data.
Definition sma.h:200
Distribute blocks round-robin among processes using one or more index values.
Definition parallel.h:249
int block_to_node(const BlockInfo< N > &b) const
Given a block, returns the node on which it resides.
Definition parallel.h:287
An implementation of BlockDistrib using PairMapping.
Definition parallel.h:213
int block_to_node(const BlockInfo< N > &b) const
Given a block, returns the node on which it resides.
Definition parallel.h:237
Distributes pairs of indices among the processes.
Definition parallel.h:139
int nindex() const
Returns the dimension of this range.
Definition sma.h:125
Contains all MPQC code up to version 3.
Definition mpqcin.h:14

Generated at Wed Sep 25 2024 02:45:29 for MPQC 3.0.0-alpha using the documentation package Doxygen 1.12.0.