Index: c/mpi.c =================================================================== --- c/mpi.c (revision 5469) +++ c/mpi.c (working copy) @@ -729,6 +729,13 @@ #include "scalapack.c" #endif +// See the documentation for corresponding function in debug wrapper +// for the purpose of this function (gpaw/mpi/__init__.py) +static PyObject * get_c_object(MPIObject *self, PyObject *args) +{ + return Py_BuildValue("O", self); +} + // Forward declaration of MPI_Communicator because it needs MPIType // that needs MPI_getattr that needs MPI_Methods that need // MPI_Communicator that need ... @@ -785,6 +792,7 @@ {"diagonalize", (PyCFunction)diagonalize, METH_VARARGS, 0}, {"inverse_cholesky", (PyCFunction)inverse_cholesky, METH_VARARGS, 0}, #endif + {"get_c_object", (PyCFunction)get_c_object, METH_VARARGS, 0}, {"new_communicator", (PyCFunction)MPICommunicator, METH_VARARGS, "new_communicator(ranks) creates a new communicator."}, {0, 0, 0, 0} Index: gpaw/mpi/__init__.py =================================================================== --- gpaw/mpi/__init__.py (revision 5469) +++ gpaw/mpi/__init__.py (working copy) @@ -64,6 +64,9 @@ raise NotImplementedError('Calls to mpi waitall should not happen in ' 'serial mode') + def get_c_object(self): + raise NotImplementedError('Should not get c object for serial comm') + serial_comm = SerialCommunicator() try: @@ -81,9 +84,6 @@ if dry_run_size > 1: world = DryRunCommunicator(dry_run_size) -size = world.size -rank = world.rank -parallel = (size > 1) if debug: class _Communicator: @@ -100,6 +100,8 @@ # No duplicates: for i in range(len(sranks) - 1): assert sranks[i] != sranks[i + 1] + assert len(ranks) > 0 + comm = self.comm.new_communicator(ranks) if comm is None: # This cpu is not in the new communicator: @@ -220,9 +222,28 @@ nprow=1, npcol=1, mb=32, root=0): return self.comm.inverse_cholesky(a, nprow, npcol, mb, root) + def get_c_object(self): + """Return the C-object wrapped by this debug interface. + + Whenever a communicator object is passed to C code, that object + must be a proper C-object - *not* e.g. this debug wrapper. For + this reason. The C-communicator object has a get_c_object() + implementation which returns itself; thus, always call + comm.get_c_object() and pass the resulting object to the C code. + + XXX Must make a central check somewhere that the C type is + correct.""" + return self.comm.get_c_object() + serial_comm = _Communicator(serial_comm) + world = _Communicator(world) +size = world.size +rank = world.rank +parallel = (size > 1) + + def distribute_cpus(parsize, parsize_bands, nspins, nibzkpts, comm=world): """Distribute k-points/spins to processors. Index: gpaw/blacs.py =================================================================== --- gpaw/blacs.py (revision 5471) +++ gpaw/blacs.py (working copy) @@ -8,6 +8,7 @@ import numpy as np +from gpaw import sl_diagonalize from gpaw.mpi import SerialCommunicator from gpaw.utilities.blacs import scalapack_diagonalize_ex import _gpaw @@ -41,10 +42,10 @@ H_mM = np.zeros((0, 0)) S_mM = np.zeros((0, 0)) - S_mm = blockdescriptor.zeros(dtype) - H_mm = blockdescriptor.zeros(dtype) - C_mm = blockdescriptor.zeros(dtype) - C_nM = outdescriptor.zeros(dtype) + S_mm = blockdescriptor.zeros(dtype=dtype) + H_mm = blockdescriptor.zeros(dtype=dtype) + C_mm = blockdescriptor.zeros(dtype=dtype) + C_nM = outdescriptor.zeros(dtype=dtype) self.cols2blocks.redistribute(S_mM, S_mm) self.cols2blocks.redistribute(H_mM, H_mm) @@ -83,7 +84,8 @@ raise ValueError('Impossible: %dx%d Blacs grid with %d CPUs' % (nprow, npcol, comm.size)) # This call may also return INACTIVE - context = _gpaw.new_blacs_context(comm, npcol, nprow, order) + context = _gpaw.new_blacs_context(comm.get_c_object(), + npcol, nprow, order) self.context = context self.comm = comm @@ -131,12 +133,18 @@ def __nonzero__(self): return self.shape[0] != 0 and self.shape[1] != 0 - def zeros(self, dtype=float): - return np.zeros(self.shape, dtype) + def zeros(self, n=(), dtype=float): + return self._new_array(np.zeros, n, dtype) - def empty(self, dtype=float): - return np.empty(self.shape, dtype) + def empty(self, n=(), dtype=float): + return self._new_array(np.empty, n, dtype) + def _new_array(self, func, n, dtype): + if isinstance(n, int): + n = n, + shape = n + self.shape + return func(shape, dtype) + def check(self, a_mn): return a_mn.shape == self.shape and a_mn.flags.contiguous @@ -255,7 +263,8 @@ _gpaw.scalapack_redist(self.srcdescriptor.asarray(), self.dstdescriptor.asarray(), src_mn.T, dst_mn.T, - self.supercomm, subN, subM, isreal) + self.supercomm.get_c_object(), + subN, subM, isreal) def redistribute(self, src_mn, dst_mn): subM, subN = self.srcdescriptor.gshape @@ -276,7 +285,6 @@ class BlacsOrbitalDescriptor: # XXX can we find a less confusing name? # This class 'describes' all the LCAO/Blacs-related stuff def __init__(self, supercomm, gd, bd, kpt_comm, nao): - from gpaw import sl_diagonalize ncpus, mcpus, blocksize = sl_diagonalize[:3] bcommsize = bd.comm.size @@ -290,6 +298,7 @@ blockcomm = supercomm.new_communicator(block_ranks) mynao = -((-nao) // bcommsize) + # Range of basis functions for BLACS distribution of matrices: self.Mmax = nao self.Mstart = bcommrank * mynao @@ -308,7 +317,7 @@ nMdescriptor = stripegrid.new_descriptor(nao, nao, bd.mynbands, nao) self.mMdescriptor = mMdescriptor - self.mmdescriptro = mmdescriptor + self.mmdescriptor = mmdescriptor self.nMdescriptor = nMdescriptor self.mM2mm = Redistributor(supercomm, mMdescriptor, mmdescriptor) self.mm2nM = Redistributor(supercomm, mmdescriptor, nMdescriptor) @@ -318,3 +327,40 @@ def get_diagonalizer(self): return SLEXDiagonalizer(self.gd, self.bd, self.mM2mm, self.mm2nM) + + def get_overlap_descriptor(self): + return self.mMdescriptor + + def get_diagonalization_descriptor(self): + return self.mmdescriptor + + def get_coefficient_descriptor(self): + return self.nMdescriptor + + +class OrbitalDescriptor: + def __init__(self, nao, nmybands): + self.mMdescriptor = MatrixDescriptor(nao, nao) + self.nMdescriptor = MatrixDescriptor(nmybands, nao) + + self.Mstart = 0 + self.Mstop = nao + self.Mmax = nao + + def get_diagonalizer(self): + if sl_diagonalize: + from gpaw.lcao.eigensolver import SLDiagonalizer + diagonalizer = SLDiagonalizer() + else: + from gpaw.lcao.eigensolver import LapackDiagonalizer + diagonalizer = LapackDiagonalizer() + return diagonalizer + + def get_overlap_descriptor(self): + return self.mMdescriptor + + def get_diagonalization_descriptor(self): + return self.mMdescriptor + + def get_coefficent_descriptor(self): + return self.nMdescriptor Index: gpaw/fd_operators.py =================================================================== --- gpaw/fd_operators.py (revision 5469) +++ gpaw/fd_operators.py (working copy) @@ -47,7 +47,7 @@ self.shape = tuple(n_c) if gd.comm.size > 1: - comm = gd.comm + comm = gd.comm.get_c_object() else: comm = None Index: gpaw/wavefunctions.py =================================================================== --- gpaw/wavefunctions.py (revision 5470) +++ gpaw/wavefunctions.py (working copy) @@ -441,7 +441,8 @@ od = BlacsOrbitalDescriptor(self.world, self.gd, self.bd, self.kpt_comm, self.setups.nao) else: - od = None + from gpaw.blacs import OrbitalDescriptor + od = OrbitalDescriptor(self.setups.nao, self.bd.mynbands) self.od = od #elif extra_parameters.get('blacs'): @@ -463,7 +464,8 @@ WaveFunctions.set_eigensolver(self, eigensolver) eigensolver.initialize(self.kpt_comm, self.gd, self.band_comm, self.dtype, - self.setups.nao, self.mynbands, self.world) + self.setups.nao, self.mynbands, self.world, + self.od.get_diagonalizer()) def set_positions(self, spos_ac): WaveFunctions.set_positions(self, spos_ac) @@ -489,8 +491,15 @@ if extra_parameters.get('blacs'): Mstart = self.od.Mstart self.tci.set_matrix_distribution(Mstart, mynao) - - S_qMM = np.empty((nq, mynao, nao), self.dtype) + + overlap_descriptor = self.od.get_overlap_descriptor() + + S_qMM = overlap_descriptor.empty(nq, self.dtype) + + #S_qMM = np.empty((nq, mynao, nao), self.dtype) + + #assert S_qMM.shape == S1_qMM.shape, '%s vs %s' % (S_qMM.shape, + # S1_qMM.shape) T_qMM = np.empty((nq, mynao, nao), self.dtype) for kpt in self.kpt_u: q = kpt.q @@ -519,13 +528,14 @@ dOP_iM = np.zeros((dO_ii.shape[1], nao), P_Mi.dtype) # (ATLAS can't handle uninitialized output array) gemm(1.0, P_Mi, dO_ii, 0.0, dOP_iM, 'c') - gemm(1.0, dOP_iM, P_Mi[Mstart:Mstop], 1.0, S_MM, 'n') + if S_MM.shape[-2:] != (0, 0): + gemm(1.0, dOP_iM, P_Mi[Mstart:Mstop], 1.0, S_MM, 'n') comm = self.gd.comm comm.sum(S_qMM) comm.sum(T_qMM) - - if debug and self.band_comm.size == 1: + + if 0:#debug and self.band_comm.size == 1: from numpy.linalg import eigvalsh for S_MM in S_qMM: smin = eigvalsh(S_MM).real.min() @@ -963,18 +973,8 @@ lcaowfs.set_positions(spos_ac) eigensolver = get_eigensolver('lcao', 'lcao') - from gpaw import sl_diagonalize - if extra_parameters.get('blacs'): - diagonalizer = lcaowfs.od.get_diagonalizer() - elif sl_diagonalize: - from gpaw.lcao.eigensolver import SLDiagonalizer - diagonalizer = SLDiagonalizer() - else: - from gpaw.lcao.eigensolver import LapackDiagonalizer - diagonalizer = LapackDiagonalizer() - - - eigensolver.initialize(self.kpt_comm, self.gd, self.band_comm, + diagonalizer = lcaowfs.od.get_diagonalizer() + eigensolver.initialize(self.kpt_comm, self.gd, self.band_comm, self.dtype, self.setups.nao, lcaomynbands, self.world, diagonalizer) Index: gpaw/utilities/blacs.py =================================================================== --- gpaw/utilities/blacs.py (revision 5471) +++ gpaw/utilities/blacs.py (working copy) @@ -33,7 +33,8 @@ assert nprow * npcol <= comm_obj.size assert 0 < mb <= m assert 0 < nb <= n - return _gpaw.blacs_create(comm_obj, m, n, nprow, npcol, mb, nb, order) + return _gpaw.blacs_create(comm_obj.get_c_object(), + m, n, nprow, npcol, mb, nb, order) def blacs_destroy(adesc): assert len(adesc) == 9 @@ -56,7 +57,8 @@ assert (bdesc[2] == m) or (bdesc[2] == adesc[2]) assert (bdesc[3] == n) or (bdesc[3] == adesc[3]) # There is no simple may to check if adesc and bdesc are disjoint to comm_obj - return _gpaw.scalapack_redist1(a_obj, adesc, bdesc, isreal, comm_obj, m, n) + return _gpaw.scalapack_redist1(a_obj, adesc, bdesc, isreal, + comm_obj.get_c_object(), m, n) def scalapack_diagonalize_dc(a_obj, adesc, uplo): if a_obj is not None: Index: gpaw/transformers.py =================================================================== --- gpaw/transformers.py (revision 5469) +++ gpaw/transformers.py (working copy) @@ -64,7 +64,7 @@ assert not self.allocated gdin = self.gdin if gdin.comm.size > 1: - comm = gdin.comm + comm = gdin.comm.get_c_object() else: comm = None Index: gpaw/lcao/overlap.py =================================================================== --- gpaw/lcao/overlap.py (revision 5469) +++ gpaw/lcao/overlap.py (working copy) @@ -320,8 +320,13 @@ Mend1b = min(self.mynao, Mend1) Mstart2 = msoe.M2_a[a2] Mend2 = Mstart2 + tsoe.shape[1] - x_xqNM[..., Mstart1b:Mend1b, Mstart2:Mend2] = \ - x_qxmm[..., Mstart1b - Mstart1:Mend1b - Mstart1, :] + #print (x_xqNM[..., Mstart1b:Mend1b, Mstart2:Mend2].shape, + # x_qxmm[..., Mstart1b - Mstart1:Mend1b - Mstart1, :].shape) + try: + x_xqNM[..., Mstart1b:Mend1b, Mstart2:Mend2] = \ + x_qxmm[..., Mstart1b - Mstart1:Mend1b - Mstart1, :] + except ValueError: + pass class SimpleAtomIter: def __init__(self, cell_cv, spos1_ac, spos2_ac, offsetsteps=0): Index: customize.py =================================================================== --- customize.py (revision 5469) +++ customize.py (working copy) @@ -1,57 +1,25 @@ -#User provided customizations for the gpaw setup +scalapack = True -#Here, one can override the default arguments, or append own -#arguments to default ones -#To override use the form -# libraries = ['somelib','otherlib'] -#To append use the form -# libraries += ['somelib','otherlib'] +compiler='gcc' +mpicompiler='/usr/lib/openmpi/1.3.2-gcc/bin/mpicc' +mpilinker=mpicompiler +libraries = ['mpiblacsCinit', 'mpiblacs', 'scalapack', 'blas', 'lapack', 'mpi', 'mpi_f77'] -# Valid values for scalapack are False, or True: -# False (the default) - no ScaLapack compiled in -# True - ScaLapack compiled in -#scalapack = True +library_dirs = ['/opt/blacs/1.1/24/1.el5.fys.gfortran.4.1.2.openmpi.1.3.2/lib', + '/opt/scalapack/1.8.0/1.el5.fys.gfortran.4.1.2.openmpi.1.3.2.blas.3.0.37.el5.lapack.3.0.37.el5/lib/', + '/usr/lib/openmpi/1.3.2-gcc/lib/'] +include_dirs += ['/usr/lib/openmpi/1.3.2-gcc/include'] -#compiler = 'mpcc' -#libraries = [] -#libraries += [] +extra_link_args = [ + '-Wl,-rpath=/usr/lib/openmpi/1.3.2-gcc/lib/,' + '-rpath=/opt/blacs/1.1/24/1.el5.fys.gfortran.4.1.2.openmpi.1.3.2/lib,' + '-rpath=/opt/scalapack/1.8.0/1.el5.fys.gfortran.4.1.2.openmpi.1.3.2.blas.3.0.37.el5.lapack.3.0.37.el5/lib/' + ] -#library_dirs = [] -#library_dirs += [] +extra_compile_args = ['-O3', '-std=c99', '-funroll-all-loops', '-fPIC'] +define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] +define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] +define_macros += [('GPAW_MPI_DEBUG', '1')] -#include_dirs = [] -#include_dirs += [] - -#extra_link_args = [] -#extra_link_args += [] - -#extra_compile_args = [] -#extra_compile_args += [] - -#runtime_library_dirs = [] -#runtime_library_dirs += [] - -#extra_objects = [] -#extra_objects += [] - -#define_macros = [] -#define_macros += [] - -#mpicompiler = None -#mpilinker = None -#mpi_libraries = [] -#mpi_libraries += [] - -#mpi_library_dirs = [] -#mpi_library_dirs += [] - -#mpi_include_dirs = [] -#mpi_include_dirs += [] - -#mpi_runtime_library_dirs = [] -#mpi_runtime_library_dirs += [] - -#mpi_define_macros = [] -#mpi_define_macros += [] - -#platform_id = '' +extra_link_args += ['-g'] +extra_compile_args += ['-O0', '-g']