29#ifndef _chemistry_qc_lcao_fockbuild_h
30#define _chemistry_qc_lcao_fockbuild_h
32#include <mpqc_config.h>
33#include <util/misc/regtime.h>
34#include <util/group/thread.h>
35#include <util/group/message.h>
36#include <chemistry/qc/basis/integral.h>
38#include <util/group/actmsg.h>
39#include <chemistry/qc/lcao/fockdist.h>
61 inline int block_offset(
int I,
int J)
const {
63 return I * blocks2_->nblock() + J;
67 std::cout <<
"shell_block_offset: noncanonical indices: "
68 << I <<
", " << J << std::endl;
71 return (I*(I+1))/2 + J;
74 inline int n_block()
const {
76 return blocks1_->nblock() * blocks2_->nblock();
79 return (blocks1_->nblock()*(blocks1_->nblock()+1))/2;
82 inline int block_owning_proc(
int I,
int J)
const {
83 return block_offset(I,J)%nproc_;
85 inline bool block_is_owner(
int I,
int J)
const {
86 return block_owning_proc(I,J) == me_;
89 inline int shell_block_offset(
int I,
int J)
const {
95 std::cout <<
"shell_block_offset: noncanonical indices: "
96 << I <<
", " << J << std::endl;
99 return (I*(I+1))/2 + J;
102 inline int n_shell_block()
const {
107 return (nI_*(nI_+1))/2;
110 inline int shell_block_owning_proc(
int I,
int J)
const {
111 return block_owning_proc(blocks1_->shell_to_block(I),
112 blocks2_->shell_to_block(J));
114 inline bool shell_block_is_owner(
int I,
int J)
const {
115 return shell_block_owning_proc(I,J) == me_;
118 virtual bool symmetric()
const = 0;
121 virtual void fix_diagonal_blocks()
const = 0;
122 virtual void clear() = 0;
131 virtual void data_to_scmat()
const = 0;
132 virtual void prefetch_block(
int I,
int J,
int ifetch,
int nfetch) = 0;
133 virtual void finish_prefetch_block() = 0;
134 virtual double *shell_block(
int Ish,
int Jsh)
const = 0;
135 virtual double *block(
int Ish,
int Jsh)
const = 0;
137 bool copy_data =
false)
const = 0;
158 double **blockpointers_;
168 void data_to_symmat()
const;
169 void data_to_rectmat()
const;
173 bool copy_data =
false);
175 bool symmetric()
const;
176 void fix_diagonal_blocks()
const;
183 void data_to_scmat()
const;
184 void prefetch_block(
int I,
int J,
int ifetch,
int nfetch);
185 void finish_prefetch_block();
186 double *shell_block(
int Ish,
int Jsh)
const;
187 double *block(
int Ish,
int Jsh)
const;
214 int matrix_, I_, J_, nIJ_;
216 int use_shell_blocks_;
223 void set_info(
int I,
int J,
int nIJ,
double *data,
224 bool use_shell_blocks) {
229 use_shell_blocks_ = use_shell_blocks;
238 bool use_shell_blocks_;
246 bool use_shell_blocks,
249 void process_write(
int node,
int I,
int J,
int nIJ,
double *data);
250 void process_read(
int node,
int I,
int J,
int nIJ,
double *data);
251 void begin_prefetch(
int node,
int I,
int J,
int nIJ,
int ifetch,
int nfetch,
254 bool need_read()
const {
return need_read_; }
255 bool need_write()
const {
return need_write_; }
258 return fbamg_->messagegrp();
261 return fbamg_->return_messagegrp();
266 double **blockpointers_;
276 bool prefetch_blocks_;
283 IJ_t(
const IJ_t&IJ) { ij_ = IJ.ij_; }
284 IJ_t(
int i,
int j) { ij_ = uint64_t(i)<<32|j; }
285 int i()
const {
return ij_>>32; }
286 int j()
const {
return ij_&0xffffffff; }
287 bool operator < (
const IJ_t&IJ)
const {
return ij_ < IJ.ij_; }
288 bool operator > (
const IJ_t&IJ)
const {
return ij_ > IJ.ij_; }
289 bool operator >= (
const IJ_t&IJ)
const {
return ij_ >= IJ.ij_; }
290 bool operator <= (
const IJ_t&IJ)
const {
return ij_ <= IJ.ij_; }
291 bool operator == (
const IJ_t&IJ)
const {
return ij_ == IJ.ij_; }
292 void operator = (
const IJ_t&IJ) { ij_ = IJ.ij_; }
294 typedef std::pair<double*,MessageGrp::MessageHandle> FetchData_t;
296 mutable std::map<IJ_t,FetchData_t> prefetched_block_cache_;
297 mutable std::map<IJ_t,double*> block_cache_;
298 mutable std::map<IJ_t,double*> shell_block_cache_;
300 std::map<IJ_t,double*> local_blocks_;
301 std::map<IJ_t,double*> local_shell_blocks_;
305 void data_to_symmat()
const;
306 void data_to_rectmat()
const;
308 double *fetch_block(
int I,
int J)
const;
309 int block_size(
int iblock,
int jblock)
const;
310 void insert_shell_block_pointers(
int iblock,
int jblock,
312 std::map<IJ_t,double*> &)
const;
313 void blockpointers_to_local_blocks();
314 void local_blocks_to_blockpointers()
const;
320 bool copy_data =
false);
322 bool symmetric()
const;
323 void fix_diagonal_blocks()
const;
330 void data_to_scmat()
const;
331 void prefetch_block(
int I,
int J,
int ifetch,
int nfetch);
332 void finish_prefetch_block();
333 double *shell_block(
int Ish,
int Jsh)
const;
334 double *block(
int Ish,
int Jsh)
const;
355 int I,
int J,
int K,
int L,
356 int nI,
int nJ,
int nK,
int nL,
357 const double * RESTRICT buf) = 0;
363 int I,
int J,
int K,
int L,
364 int nI,
int nJ,
int nK,
int nL,
365 const double * RESTRICT buf) = 0;
366 virtual void contrib_p12_p13p24_J(
double factor,
367 int I,
int J,
int K,
int L,
368 int nI,
int nJ,
int nK,
int nL,
369 const double * RESTRICT buf) = 0;
370 virtual void contrib_p12_p13p24_K(
double factor,
371 int I,
int J,
int K,
int L,
372 int nI,
int nJ,
int nK,
int nL,
373 const double * RESTRICT buf) = 0;
374 virtual void contrib_p34_p13p24_J(
double factor,
375 int I,
int J,
int K,
int L,
376 int nI,
int nJ,
int nK,
int nL,
377 const double * RESTRICT buf) = 0;
378 virtual void contrib_p34_p13p24_K(
double factor,
379 int I,
int J,
int K,
int L,
380 int nI,
int nJ,
int nK,
int nL,
381 const double * RESTRICT buf) = 0;
382 virtual void contrib_p12_p34_J(
double factor,
383 int I,
int J,
int K,
int L,
384 int nI,
int nJ,
int nK,
int nL,
385 const double * RESTRICT buf) = 0;
386 virtual void contrib_p12_p34_K(
double factor,
387 int I,
int J,
int K,
int L,
388 int nI,
int nJ,
int nK,
int nL,
389 const double * RESTRICT buf) = 0;
390 virtual void contrib_p34_J(
double factor,
391 int I,
int J,
int K,
int L,
392 int nI,
int nJ,
int nK,
int nL,
393 const double * RESTRICT buf) = 0;
394 virtual void contrib_p34_K(
double factor,
395 int I,
int J,
int K,
int L,
396 int nI,
int nJ,
int nK,
int nL,
397 const double * RESTRICT buf) = 0;
398 virtual void contrib_p13p24_J(
double factor,
399 int I,
int J,
int K,
int L,
400 int nI,
int nJ,
int nK,
int nL,
401 const double * RESTRICT buf) = 0;
402 virtual void contrib_p13p24_K(
double factor,
403 int I,
int J,
int K,
int L,
404 int nI,
int nJ,
int nK,
int nL,
405 const double * RESTRICT buf) = 0;
406 virtual void contrib_all_J(
double factor,
407 int I,
int J,
int K,
int L,
408 int nI,
int nJ,
int nK,
int nL,
409 const double * RESTRICT buf) = 0;
410 virtual void contrib_all_K(
double factor,
411 int I,
int J,
int K,
int L,
412 int nI,
int nJ,
int nK,
int nL,
413 const double * RESTRICT buf) = 0;
416 virtual void set_fmat(
int i,
const RefSCMatrix &) = 0;
419 virtual void set_jmat(
int i,
const RefSCMatrix &) = 0;
422 virtual void set_kmat(
int i,
const RefSCMatrix &) = 0;
427 virtual double *jmat_shell_block(
int i,
int Ish,
int Jsh) = 0;
428 virtual double *kmat_shell_block(
int i,
int Ish,
int Jsh) = 0;
429 virtual const double *pmat_shell_block(
int i,
int Ish,
int Jsh) = 0;
431 virtual double *jmat_block(
int i,
int Ish,
int Jsh) = 0;
432 virtual double *kmat_block(
int i,
int Ish,
int Jsh) = 0;
433 virtual const double *pmat_block(
int i,
int Ish,
int Jsh) = 0;
453 double nint()
const {
return nint_; }
454 double &nint() {
return nint_; }
456 virtual void activate() = 0;
457 virtual void sync() = 0;
458 virtual void deactivate() = 0;
460 virtual void flush() = 0;
462 virtual void prefetch_blocks(
int I,
int J,
int ifetch,
int nfetch) = 0;
463 virtual void finish_prefetch_blocks() = 0;
465 virtual void set_fockblocks(
const Ref<FockBlocks> &blocks_f1,
466 const Ref<FockBlocks> &blocks_f2,
467 const Ref<FockBlocks> &blocks_p) = 0;
469 virtual Ref<ThreadLock> &get_lock(
int i,
int I,
int J) = 0;
494 std::vector<Ref<FockBuildMatrix> >
jmats_;
495 std::vector<Ref<FockBuildMatrix> > kmats_;
496 std::vector<bool> k_is_j_;
498 std::vector<Ref<FockBuildMatrix> >
pmats_;
500 bool f_b1_equiv_f_b2;
502 std::vector<Ref<ThreadLock> > locks_;
503 std::string fockbuildmatrixtype_;
504 bool use_shell_blocks_;
509 const std::string &type,
516 int i,
int I,
int J) {
517 return owner->jmat_shell_block(i,I,J);
524 int i,
int I,
int J) {
525 return owner->kmat_shell_block(i,I,J);
529 template <
class Locator>
533 int i_, I_, J_, nIJ_;
536 int i,
int I,
int J,
int nI,
int nJ) {
542 data_ = owner_->alloc_scratch(nIJ_);
547 if (owner_->use_shell_blocks()) {
552 ilock = owner_->jmat(i_)->blocks1()->shell_to_block(I_);
553 jlock = owner_->jmat(i_)->blocks2()->shell_to_block(J_);
556 owner_->get_lock(i_,ilock,jlock));
558 double *real_data = l(owner_,i_,I_,J_);
567 for (
int i=0; i<nIJ_; i++) {
571 real_data[i] += data_[i];
579 owner_->free_scratch(data_);
581 double *data() {
return data_; }
589 int i,
int I,
int J,
int nI,
int nJ) {
591 data_ = owner_->pmat_shell_block(i,I,J);
594 const double *data() {
return data_; }
601 const std::string &fockbuildmatrixtype);
604 signed char *pmax)
const;
607 double *jmat_shell_block(
int i,
int I,
int J) {
608 return jmats_[i]->shell_block(I,J);
610 bool jmat_symmetric(
int i)
const {
return jmats_[i]->symmetric(); }
611 double *kmat_shell_block(
int i,
int I,
int J) {
612 return kmats_[i]->shell_block(I,J);
614 bool kmat_symmetric(
int i)
const {
return kmats_[i]->symmetric(); }
615 const double *pmat_shell_block(
int i,
int I,
int J) {
616 return pmats_[i]->shell_block(I,J);
619 double *jmat_block(
int i,
int I,
int J) {
620 return jmats_[i]->block(I,J);
622 double *kmat_block(
int i,
int I,
int J) {
623 return kmats_[i]->block(I,J);
625 const double *pmat_block(
int i,
int I,
int J) {
626 return pmats_[i]->block(I,J);
629 Ref<ThreadLock> &get_lock(
int i,
int Ish,
int Jsh) {
630 int hash = (i+(Ish+1)*(Jsh+1))%nlocks_;
634 double *alloc_scratch(
int size) {
635 double *data =
new double[size];
636 memset(data,0,
sizeof(
double)*size);
640 void free_scratch(
double *data) {
644 void set_fmat(
int i,
const RefSCMatrix &);
645 void set_fmat(
int i,
const RefSymmSCMatrix &);
647 void set_jmat(
int i,
const RefSCMatrix &);
648 void set_jmat(
int i,
const RefSymmSCMatrix &);
650 void set_kmat(
int i,
const RefSCMatrix &);
651 void set_kmat(
int i,
const RefSymmSCMatrix &);
653 void set_pmat(
int i,
const RefSymmSCMatrix &);
668 void prefetch_blocks(
int I,
int J,
int ifetch,
int nfetch);
669 void finish_prefetch_blocks();
679 const Ref<FockBuildMatrix> &pmat(
int i) {
return pmats_[i]; }
681 bool use_shell_blocks()
const {
return use_shell_blocks_; }
697 const signed char *pmax_;
699 bool prefetch_blocks_;
704 int can_sym_offset(
int i,
int j) {
return (i*(i+1))/2 + j; }
705 int gen_sym_offset(
int i,
int j) {
706 if (i>=j) {
return can_sym_offset(i,j); }
707 else {
return can_sym_offset(j,i); }
715 bool prefetch_blocks,
722 void set_accuracy(
double acc) { accuracy_ = acc; }
723 void set_compute_J(
bool compute_J) { compute_J_ = compute_J; }
724 void set_compute_K(
bool compute_K) { compute_K_ = compute_K; }
725 void set_coef_K(
double coef_K) { coef_K_ = coef_K; }
726 void set_pmax(
const signed char *pmax) { pmax_ = pmax; }
727 const Ref<RegionTimer> get_timer()
const {
return timer_; }
739 int iblock,
int jblock,
int kblock,
int lblock);
746 bool prefetch_blocks,
783 bool prefetch_blocks,
816 bool prefetch_blocks_;
830 bool prefetch_blocks,
847 void init_threads(FBT_CTOR);
862 bool prefetch_blocks,
875 void set_accuracy(
double acc) { accuracy_ = acc; }
877 void set_compute_J(
bool compute_J) { compute_J_ = compute_J; }
878 void set_compute_K(
bool compute_K) { compute_K_ = compute_K; }
879 void set_coef_K(
double coef_K) { coef_K_ = coef_K; }
880 bool compute_J()
const {
return compute_J_; }
881 bool compute_K()
const {
return compute_K_; }
882 double coef_K()
const {
return coef_K_; }
ActiveMessageGrp provides an implemention of active messages that sends objects derived from ActiveMe...
Definition actmsg.h:87
Derivatives of ActiveMessage can be constructed in one process and executed in another by using Activ...
Definition actmsg.h:46
Definition fockbuild.h:265
void zero_data()
Zero out the data.
void flush()
Flush the buffer cache (if it exists).
void accum(const Ref< FockBuildMatrix > &fbm)
Accumulate fbm into this.
void accum_remote(const Ref< MessageGrp > &)
Accumulate remote contributions.
Definition fockbuild.h:197
Definition fockbuild.h:212
void save_data_state(StateOut &s)
Save the base classes (with save_data_state) and the members in the same order that the StateIn CTOR ...
Definition fockbuild.h:43
virtual void print() const
The default print member does nothing.
virtual void flush()
Flush the buffer cache (if it exists).
virtual void accum(const Ref< FockBuildMatrix > &fbm)=0
Accumulate fbm into this.
virtual void zero_data()=0
Zero out the data.
virtual void accum_remote(const Ref< MessageGrp > &)=0
Accumulate remote contributions.
Definition fockbuild.h:233
The FockBuildThread class is used to actually build the Fock matrix.
Definition fockbuild.h:733
FockBuildThread_F11_P11(const Ref< FockDistribution > &fockdist, const Ref< MessageGrp > &msg, int nthread, int threadnum, bool prefetch_blocks, const Ref< ThreadLock > &lock, const Ref< Integral > &integral, const Ref< PetiteList > &pl, const Ref< GaussianBasisSet > &basis1, const Ref< GaussianBasisSet > &basis2, const Ref< GaussianBasisSet > &basis3, const Ref< FockBlocks > &blocks1, const Ref< FockBlocks > &blocks2, const Ref< FockBlocks > &blocks3, bool compute_J, bool compute_K, double coef_K)
Each thread must be given a unique contribution, c.
void run()
This is called with the Thread is run from a ThreadGrp.
This is used to build the Fock matrix when none of the basis sets are equivalent.
Definition fockbuild.h:765
FockBuildThread_F12_P33(const Ref< FockDistribution > &fockdist, const Ref< MessageGrp > &msg, int nthread, int threadnum, bool prefetch_blocks, const Ref< ThreadLock > &lock, const Ref< Integral > &integral, const Ref< PetiteList > &pl, const Ref< GaussianBasisSet > &basis1, const Ref< GaussianBasisSet > &basis2, const Ref< GaussianBasisSet > &basis3, const Ref< FockBlocks > &blocks1, const Ref< FockBlocks > &blocks2, const Ref< FockBlocks > &blocks3, bool compute_J, bool compute_K, double coef_K)
Each thread must be given a unique contribution, c.
void run()
This is called with the Thread is run from a ThreadGrp.
The FockBuildThread class is used to actually build the Fock matrix.
Definition fockbuild.h:687
FockBuildThread(const Ref< FockDistribution > &fockdist, const Ref< MessageGrp > &msg, int nthread, int threadnum, bool prefetch_blocks, const Ref< ThreadLock > &lock, const Ref< Integral > &integral, bool compute_J, bool compute_K, double coef_K)
Each thread must be given a unique contribution, c.
The FockBuild class works with the FockBuildThread class to generate Fock matrices for both closed sh...
Definition fockbuild.h:805
void build()
Contruct the Fock matrices.
FockBuild(const Ref< FockDistribution > &fockdist, const Ref< FockContribution > &contrib, bool prefetch_blocks, const Ref< GaussianBasisSet > &b_f1, const Ref< GaussianBasisSet > &b_f2=0, const Ref< GaussianBasisSet > &b_p=0, const Ref< MessageGrp > &msg=MessageGrp::get_default_messagegrp(), const Ref< ThreadGrp > &thr=ThreadGrp::get_default_threadgrp(), const Ref< Integral > &integral=Integral::get_default_integral())
Create a FockBuild object using b_f1 as the Fock matrix row dimension basis, b_f2 as the Fock matrix ...
Definition fockbuild.h:342
virtual void accum(const Ref< FockContribution > &)=0
Sum the Fock matrix contributions from different threads.
virtual void update()=0
Push the internal Fock matrix data back into the original object.
virtual void contrib_e_K(double factor, int I, int J, int K, int L, int nI, int nJ, int nK, int nL, const double *RESTRICT buf)=0
This routine does not permute any indices.
virtual void copy_matrices(int unique_id)=0
Copy matrices to allow multiple threads to coexist.
virtual void accum_remote(const Ref< MessageGrp > &)=0
Sum the Fock matrix contributions from different processors.
virtual void contrib_e_J(double factor, int I, int J, int K, int L, int nI, int nJ, int nK, int nL, const double *RESTRICT buf)=0
This routine does not permute any indices.
virtual signed char * compute_pmax() const =0
Compute the maximum of the density in each block.
Definition fockbuild.h:530
Definition fockbuild.h:513
Definition fockbuild.h:521
Definition fockbuild.h:584
The GenericFockContribution class provides much of the infrastructure needed by FockContribution spec...
Definition fockbuild.h:491
void accum(const Ref< FockContribution > &)
Sum the Fock matrix contributions from different threads.
signed char * compute_pmax() const
Compute the maximum of the density in each block.
void update()
Push the internal Fock matrix data back into the original object.
std::vector< Ref< FockBuildMatrix > > pmats_
the number of density matrices
Definition fockbuild.h:498
void copy_matrices(int unique_id)
Copy matrices to allow multiple threads to coexist.
std::vector< Ref< FockBuildMatrix > > jmats_
the number of Fock matrices
Definition fockbuild.h:494
void accum_remote(const Ref< MessageGrp > &)
Sum the Fock matrix contributions from different processors.
static Integral * get_default_integral()
Returns the default Integral factory.
static MessageGrp * get_default_messagegrp()
Returns the default message group.
The base class for all reference counted objects.
Definition ref.h:192
The RefSCMatrix class is a smart pointer to an SCMatrix specialization.
Definition matrix.h:135
The RefSymmSCMatrix class is a smart pointer to an SCSymmSCMatrix specialization.
Definition matrix.h:265
A template class that maintains references counts.
Definition ref.h:361
Definition fockbuild.h:157
virtual void print() const
The default print member does nothing.
void accum_remote(const Ref< MessageGrp > &)
Accumulate remote contributions.
void zero_data()
Zero out the data.
void accum(const Ref< FockBuildMatrix > &fbm)
Accumulate fbm into this.
Restores fundamental and user-defined types from images created with StateOut.
Definition statein.h:79
Serializes fundamental and user-defined types.
Definition stateout.h:71
static ThreadGrp * get_default_threadgrp()
Returns the default ThreadGrp.
The Thread abstract class defines an interface which must be implemented by classes wishing to be run...
Definition thread.h:75
Contains all MPQC code up to version 3.
Definition mpqcin.h:14