MPQC 3.0.0-alpha
Loading...
Searching...
No Matches
core.hpp
1#ifndef MPQC_ARRAY_CORE_HPP
2#define MPQC_ARRAY_CORE_HPP
3
4#include "mpqc/array/forward.hpp"
5
6#include "mpqc_config.h"
7#include "mpqc/mpi.hpp"
8#ifdef HAVE_MPI
9#include "mpqc/array/parallel.hpp"
10#ifdef HAVE_ARMCI
11extern "C" {
12#include <armci.h>
13}
14#endif
15#endif // HAVE_MPI
16
17#include "mpqc/range.hpp"
18#include <boost/foreach.hpp>
19#include "mpqc/utility/mutex.hpp"
20#include "mpqc/utility/exception.hpp"
21
22#include <boost/noncopyable.hpp>
23
24namespace mpqc {
25namespace detail {
26
29 };
30
31 template<typename T>
33 : ArrayBase, boost::noncopyable
34 {
35
36 template<typename Extent>
37 array_impl(const std::string &name,
38 const std::vector<Extent> &extents)
39 : ArrayBase(name, extents)
40 {
41 data_.resize(this->size());
42 }
43
44 void sync() {}
45
46 protected:
47
48 void _put(const std::vector<range> &r, const void *buffer) {
49 apply(putv, r, this->dims_, &this->data_[0], (const T*)buffer);
50 }
51
52 void _get(const std::vector<range> &r, void *buffer) const {
53 apply(getv, r, this->dims_, &this->data_[0], (T*)buffer);
54 }
55
56 private:
57
58 std::vector<T> data_;
59
60 static size_t putv(size_t size, T *data, const T *buffer) {
61 std::copy(buffer, buffer+size, data);
62 return size;
63 }
64
65 static size_t getv(size_t size, const T *data, T *buffer) {
66 std::copy(data, data+size, buffer);
67 return size;
68 }
69
70 template<class F, typename data_ptr, typename buffer_ptr>
71 static size_t apply(F f,
72 const std::vector<range> &r,
73 const std::vector<size_t> &dims,
74 data_ptr data, buffer_ptr buffer) {
75 size_t begin = 0;
76 std::vector<size_t> counts;
77 std::vector<size_t> strides;
78 counts.push_back(1);
79 strides.push_back(1);
80 for (size_t i = 0, stride = 1; i < dims.size(); ++i) {
81 counts.back() *= r[i].size();
82 begin += *r[i].begin()*stride;
83 stride *= dims[i];
84 // std::cout << "counts = " << counts.back() << std::endl;
85 // std::cout << "strides = " << strides.back() << std::endl;
86 if (dims[i] != r[i].size()) {
87 counts.push_back(1);
88 strides.push_back(stride);
89 }
90 }
91 return apply(f, counts, strides, data+begin, buffer, strides.size());
92 }
93
94 template<class F, typename data_ptr, typename buffer_ptr>
95 static size_t apply(F f,
96 const std::vector<size_t> &counts,
97 const std::vector<size_t> &strides,
98 data_ptr data, buffer_ptr buffer,
99 int level) {
100 if (level == 0)
101 return 0;
102 if (level == 1) {
103 //std::cout << "count1 = " << counts[0] << std::endl;
104 return f(counts[0], data, buffer);
105 }
106 size_t size = 0;
107 --level;
108 for (size_t i = 0; i < counts[level]; ++i) {
109 //std::cout << size << " " << strides[level] << std::endl;
110 size_t n = apply(f, counts, strides, data, buffer+size, level);
111 data += strides[level];
112 size += n;
113 }
114 return size;
115 }
116
117
118 };
119
120#ifdef HAVE_ARMCI
121
122 template<typename T>
124 : ArrayBase, boost::noncopyable
125 {
126
127 array_parallel_impl(const std::string &name,
128 const std::vector<size_t> &dims,
129 const MPI::Comm &comm)
130 : ArrayBase(name, dims)
131 {
132 initialize(ArrayBase::dims_, comm);
133 }
134
135 private:
136
137 void initialize(const std::vector<size_t> &dims, const MPI::Comm &comm) {
138
139 // dont have code to use ARMCI groups yet
140 MPQC_CHECK(comm == MPI::Comm::World());
141
142 size_t size = 1;
143 std::vector<range> extents;
144 for (int i = 0; i < int(dims.size())-1; ++i) {
145 size *= dims[i];
146 extents.push_back(range(0, dims[i]));
147 }
148 size_t block = (dims.back() + comm.size() - 1)/comm.size();
149 size *= block;
150
151 check(ARMCI_Init(), "ARMCI_Init", comm);
152
153 std::vector<void*> data(comm.size(), NULL);
154 check(ARMCI_Malloc(&data[0], size*sizeof(T)), "ARMCI_Malloc", comm);
155 data_ = data[comm.rank()];
156
157 for (size_t i = 0; i < comm.size(); ++i) {
158 Tile tile;
159 tile.data = data[i];
160 tile.proc = i;
161 tile.local = (i == comm.rank());
162 size_t begin = std::min<size_t>(dims.back(), i*block);
163 size_t end = std::min<size_t>(dims.back(), begin+block);
164 tile.extents = extents;
165 tile.extents.push_back(range(begin, end));
166 tiles_.push_back(tile);
167 //printf("tile[%i].ptr=%p\n", i, tile.ptr);
168 }
169
170 }
171
172 ~array_parallel_impl() {
173 ARMCI_Free(this->data_);
174 }
175
176 void sync() {
177 ARMCI_Barrier();
178 }
179
180 const MPI::Comm& comm() const {
181 return this->comm_;
182 }
183
184 protected:
185
186 void _put(const std::vector<range> &r, const void *buffer) {
187 apply<PUT>(this->tiles_, r, (void*)buffer);
188 }
189
190 void _get(const std::vector<range> &r, void *buffer) const {
191 apply<GET>(this->tiles_, r, buffer);
192 }
193
194 private:
195
196 enum OP { PUT, GET };
197
198 typedef array_tile<void*> Tile;
199 void *data_;
200 std::vector<Tile> tiles_;
201
202 static void check(int err, const std::string &func,
203 MPI::Comm comm = MPI::Comm::Self()) {
204 if (comm.any((err != 0))) {
205 throw std::runtime_error(func + " failed");
206 }
207 }
208
209 template<OP op>
210 static void apply(const std::vector<Tile> &tiles,
211 const std::vector<range> &r,
212 void *buffer) {
213
214 T *local = (T*)buffer;
215
216 BOOST_FOREACH (Tile t, tiles) {
217
218 std::vector<range> x = t.subset(r);
219 //std::cout << t.extents << " ^ " << r << " = " << x << std::endl;
220 if (x.empty()) continue;
221
222 size_t total = 1;
223 size_t N = x.size();
224 int remote_strides[N];
225 int local_strides[N];
226 int count[N];
227
228 for (size_t i = 0; i < N; ++i) {
229 total *= x[i].size();
230 count[i] = x[i].size();
231 local_strides[i] =
232 x[i].size()*((i > 0) ? local_strides[i-1] : 1);
233 remote_strides[i] =
234 t.extents[i].size()*((i > 0) ? remote_strides[i - 1] : 1);
235 local_strides[i] *= sizeof(T);
236 remote_strides[i] *= sizeof(T);
237 }
238 count[0] *= sizeof(T);
239
240 T *remote = (T*)t.data;
241 for (size_t i = 0, stride = 1; i < N; ++i) {
242 remote += (x[i].begin() - t.extents[i].begin())*stride;
243 stride *= t.extents[i].size();
244 }
245
246 // for (size_t i = 0; i < N; ++i) {
247 // printf("%i count=%i, remote_strides=%i, local_strides=%i, total=%lu\n",
248 // i, count[i], remote_strides[i], local_strides[i], total);
249 // }
250 // printf("tile.ptr=%p\n", t.ptr);
251
252 mutex::global::lock();
253 if (op == GET) {
254 int err;
255 err = ARMCI_GetS(remote, remote_strides,
256 local, local_strides,
257 count, N-1, t.proc);
258 check(err, "ARMCI_GetS failed");
259 }
260 if (op == PUT) {
261 int err;
262 err = ARMCI_PutS(local, local_strides,
263 remote, remote_strides,
264 count, N-1, t.proc);
265 check(err, "ARMCI_PutS failed");
266 }
267 mutex::global::unlock();
268
269 local += total;
270
271 }
272 }
273
274 };
275#endif // HAVE_ARMCI
276
277} // namespace detail
278} // namespace mpqc
279
280
281namespace mpqc {
282 static const detail::array_core_driver ARRAY_CORE;
283}
284
285
286#endif /* MPQC_ARRAY_CORE_HPP */
Contains new MPQC code since version 3.
Definition integralenginepool.hpp:37
MPI_Comm object wrapper/stub.
Definition comm.hpp:14
Definition forward.hpp:23
Definition core.hpp:27
Definition forward.hpp:18
Definition parallel.hpp:31
Definition range.hpp:25

Generated at Wed Sep 25 2024 02:45:30 for MPQC 3.0.0-alpha using the documentation package Doxygen 1.12.0.