[Trilinos-Users] Muelu issue with exported Tpetra CrsMatrix

C.S. Natarajan csnataraj at gmail.com
Thu Aug 14 12:32:40 MDT 2014


Sorry left out one more bit of info. This failure seems to be nprocs
dependent. For a 512^3 problem I am able to do the MG setup on 120 procs
but not on 140 procs.

C.S.N


On Thu, Aug 14, 2014 at 1:00 PM, C.S. Natarajan <csnataraj at gmail.com> wrote:

> Hi,
>       I am running into a strange issue when building the MG hierarchy
> with Muelu. This only happens for large problems. I am pasting a minimal
> example that can reproduce the error. The example assembles a Laplace
> operator on a structured grid with Q1 elements with Dirichlet boundary
> condition in the Z direction and Neumann BC's in the X and Y direction. In
> the example there are two global node maps. gmap and gmap2. gmap is used to
> build matrix A which has an unequal distribution of nodes in each
> processor. gmap2 is used to export matrix A from gmap to matrix B which
> contains approximately the same number of nodes in each processor. For a
> problem of size 512^3, while A builds fine, building the hierarchy with
> matrix B fails with a memory corruption error that ends with a
> MPI_Reduce_scatter error. I am using the public git version as of 08/11.
> The build is with intel C++ 14.0.0 and support for long long int is enabled
> (do i also need to enable ilp64 from mkl?)
>
> I would really appreciate any input on what I might be doing wrong or
> whether there is a bug in the Muelu library.
>
>
> TIA,
> C.S.N
>
> #include <sstream>
>
> //Epetra Includes
> #include "Epetra_BlockMap.h"
> #include "Epetra_Comm.h"
> #include "Epetra_MpiComm.h"
> #include "Epetra_VbrMatrix.h"
> #include "Epetra_SerialDenseMatrix.h"
> #include "Epetra_FEVector.h"
> #include "Epetra_FEVbrMatrix.h"
>
> // Intrepid includes
> #include "Intrepid_ArrayTools.hpp"
> #include "Intrepid_FunctionSpaceTools.hpp"
> #include "Intrepid_FieldContainer.hpp"
> #include "Intrepid_CellTools.hpp"
> #include "Intrepid_HGRAD_HEX_C1_FEM.hpp"
> #include "Intrepid_RealSpaceTools.hpp"
> #include "Intrepid_DefaultCubatureFactory.hpp"
> #include "Intrepid_Utils.hpp"
> #include <Intrepid_HGRAD_QUAD_C1_FEM.hpp>
>
> //Sacado Includes
> #include "Sacado.hpp"
>
> //Shards Includes
> #include "Shards_CellTopology.hpp"
>
> //Teuchos Includes
> #include "Teuchos_Array.hpp"
> #include "Teuchos_ArrayView.hpp"
> #include "Teuchos_ArrayViewDecl.hpp"
> #include "Teuchos_BLAS.hpp"
> #include "Teuchos_CommandLineProcessor.hpp"
> #include "Teuchos_DefaultMpiComm.hpp"
> #include "Teuchos_FancyOStream.hpp"
> #include "Teuchos_GlobalMPISession.hpp"
> #include "Teuchos_oblackholestream.hpp"
> #include "Teuchos_OrdinalTraits.hpp"
> #include "Teuchos_RCP.hpp"
> #include "Teuchos_ScalarTraits.hpp"
> #include "Teuchos_SerialDenseMatrix.hpp"
> #include "Teuchos_ArrayRCPDecl.hpp"
>
> //Lone survivor from Tpetra
> #include "Tpetra_DefaultPlatform.hpp"
> #include "Tpetra_Version.hpp"
> #include "Tpetra_BlockMap_decl.hpp"
> #include "Tpetra_BlockMap.hpp"
> #include "Tpetra_Vector.hpp"
> #include "Tpetra_VbrMatrix.hpp"
> #include "Tpetra_DistObject_decl.hpp"
> #include "Tpetra_BlockMultiVector.hpp"
> #include "Tpetra_BlockMultiVector_decl.hpp"
> #include "Tpetra_MultiVector.hpp"
>
> //Muelu includes
> #include "MueLu.hpp"
> #include "MueLu_FactoryManager.hpp"
> #include "MueLu_DirectSolver.hpp"
> #include "MueLu_Hierarchy.hpp"
> #include "MueLu_SaPFactory.hpp"
> #include "MueLu_GenericRFactory.hpp"
> #include "MueLu_RAPFactory.hpp"
> #include "MueLu_TentativePFactory.hpp"
> #include "MueLu_Ifpack2Smoother.hpp"
> #include "MueLu_SmootherFactory.hpp"
> #include "MueLu_RigidBodyModeFactory.hpp"
> #include <MueLu_UseDefaultTypes.hpp>
> #include <MueLu_MLParameterListInterpreter.hpp>
> #include "MueLu_Amesos2Smoother.hpp"
> #include <BelosConfigDefs.hpp>
> #include <BelosLinearProblem.hpp>
> #include <BelosBlockCGSolMgr.hpp>
> #include <BelosXpetraAdapter.hpp>
> #include <BelosMueLuAdapter.hpp>
> #include <MueLu_UseShortNames.hpp>
>
>
>
>
> using namespace Intrepid;
> typedef double                            ST;
> typedef Teuchos::ScalarTraits<ST>        SCT;
> typedef SCT::magnitudeType                MT;
> typedef long long int LLT;
> typedef Kokkos::DefaultNode::DefaultNodeType NO;
> typedef Tpetra::CrsMatrix<ST, LO, LLT, NO, LMO> matrix_type;
> typedef Tpetra::Map<LO, LLT, NO> map_type;
> using Teuchos::RCP;
> using Teuchos::rcp;
>
> int main(int argc, char *argv[])
> {
>
>         using Teuchos::Array;
>         Teuchos::oblackholestream nullStream;
>         Teuchos::GlobalMPISession mpiSession(&argc,&argv, NULL);
>         Epetra_MpiComm ecomm(MPI_COMM_WORLD);
>
>         Teuchos::RCP<const Teuchos::Comm<int> > comm =
> Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
>         size_t myRank = comm->getRank();
>         size_t nProc = comm->getSize();
>         std::ostream &out = (myRank == 0)?std::cout:nullStream;
>         Teuchos::RCP<Teuchos::FancyOStream> fout =
> Teuchos::getFancyOStream(Teuchos::rcpFromRef (out));
>
>         Teuchos::CommandLineProcessor clp;
>
>         int GNEX = 64;
>         clp.setOption("NX", &GNEX, "Number of elements in X-direction.");
>
>         int GNEY = 64;
>         clp.setOption("NY", &GNEY, "Number of elements in Y-direction.");
>
>         int GNEZ = 64;
>         clp.setOption("NZ", &GNEZ, "Number of elements in Z-direction.");
>
>         long long grow = 128;
>         clp.setOption("grow", &grow, "Global row number");
>
>         std::string matrix("original");
>         clp.setOption("matrix", &matrix, "original = A, exported = b");
>
>         clp.recogniseAllOptions(true);
>         clp.throwExceptions(false);
>
>         Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn
> = clp.parse(argc, argv);
>         if(parseReturn ==
> Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
>         if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
> return 1;
>
>         const int numGElements =
> GNEX*GNEY*GNEZ;
>
>         const int GNNX = GNEX+1, GNNY = GNEY+1, GNNZ=
> GNEZ+1;
>         const int numGNodes    = GNNX*GNNY*GNNZ;
>         const int numGDof      =
> GNNX*GNNY*GNNZ;
>         out << "\t\t Number of global elements is : " << numGElements <<
> "\n";
>         out << "\t\t Number of global nodes is    : " << numGNodes << "\n";
>
>         Epetra_Map elemZMap(GNEZ, 0, ecomm);
>         long count = GNNZ/nProc;
>         long np; np = GNNZ - (nProc*count);
>         count += (myRank < np)?1:0;
>
>         long long int nnu =
> count*GNNX*GNNY;
>
>         LO indexBase = 0;
>         Teuchos::RCP<const Tpetra::Map<LO, LLT> > gmap = Teuchos::rcp(new
> Tpetra::Map<LO,LLT>(-1, nnu, indexBase, comm));
>         int numLNodes = gmap->getNodeNumElements();
>
>         int startOverlapZ = (myRank ==
> 0)?0:(gmap->getGlobalElement(0))-GNNX*GNNY;
>
>         int endOverlapZ = (myRank ==
> nProc-1)?(gmap->getGlobalElement(0)+(count-1)*GNNX*GNNY):(gmap->getGlobalElement(0)+count*GNNX*GNNY);
>
>
>         startOverlapZ /= (GNNX*GNNY); endOverlapZ /=
> (GNNX*GNNY);
>
>         int startX = 0; int endX = GNNX;
>
>         int startUY = (myRank == 0)?
> startOverlapZ:(startOverlapZ+1);
> // Unique map start index
>         int endUY   = (myRank ==
> nProc-1)?endOverlapZ:(endOverlapZ-1);                                   //
> Unique map end index
>
>         int numLElemZ = elemZMap.NumMyElements();
>         int numLElemX = GNEX, numLElemY = GNEY;
>         int numLElems = numLElemX*numLElemY*numLElemZ;
>         int startElemZ = elemZMap.GID(0);
>         int endElemZ = elemZMap.GID(numLElemZ-1);
>         int startNodeZ = startElemZ;
>         int endNodeZ = endElemZ+1;
>
>         typedef shards::CellTopology    CellTopology;
>         CellTopology h8(shards::getCellTopologyData<shards::Hexahedron<8>
> >() );
>         // Get dimensions
>         int numNodesPerElem = h8.getNodeCount();
>         int spaceDim = h8.getDimension();
>
>         FieldContainer<int> elemNode(numLElems, numNodesPerElem);
>         int lElem = 0;
>         for(int k = startElemZ; k <= endElemZ; k++)
>                 for (int j = 0; j < GNEY; j++)
>                         for (int i = 0; i < GNEX; i++)
>                         {
>                                 elemNode(lElem,0) = (GNNX*GNNY)*k +
> (GNNX)*j + i;
>                                 elemNode(lElem,1) = (GNNX*GNNY)*k +
> (GNNX)*j + i + 1;
>                                 elemNode(lElem,2) = (GNNX*GNNY)*k +
> (GNNX)*(j + 1) + i + 1;
>                                 elemNode(lElem,3) = (GNNX*GNNY)*k +
> (GNNX)*(j + 1) + i;
>                                 elemNode(lElem,4) = (GNNX*GNNY)*(k+1) +
> (GNNX)*j + i;
>                                 elemNode(lElem,5) = (GNNX*GNNY)*(k+1) +
> (GNNX)*j + i + 1;
>                                 elemNode(lElem,6) = (GNNX*GNNY)*(k+1) +
> (GNNX)*(j + 1) + i + 1;
>                                 elemNode(lElem,7) = (GNNX*GNNY)*(k+1) +
> (GNNX)*(j + 1) + i;
>                                 lElem++;
>                         }
>
>         double leftX = 0.0, rightX = 1.0;
>         double leftY = 0.0, rightY = 1.0;
>         double leftZ = 0.0, rightZ = 1.0;
>
>         hx = hy = hz = 1;
>         numLNodes = (numLElemZ+1)*(numLElemY+1)*(numLElemX+1);
>         FieldContainer<double> nodeCoord(numLNodes, spaceDim);
>         FieldContainer<int> nodeOnBoundary(numLNodes);
>
>         int inode = 0;
>
>         for (int k = startNodeZ; k <= endNodeZ; k++)
>                 for (int j = 0; j < GNNY; j++)
>                         for (int i = 0; i < GNNX; i++)
>                         {
>                                 nodeCoord(inode,0) = leftX + (double)i*hx;
>                                 nodeCoord(inode,1) = leftY + (double)j*hy;
>                                 nodeCoord(inode,2) = leftZ +
> (double)k*hz;
>                                 inode++;
>                         }
>
>         DefaultCubatureFactory<double>  cubFactory;
>         int cubDegree = 3;
>         Teuchos::RCP<Cubature<double> > hexCub = cubFactory.create(h8,
> cubDegree);
>
>         int cubDim       = hexCub->getDimension();
>         int numCubPoints = hexCub->getNumPoints();
>
>         FieldContainer<double> cubPoints(numCubPoints, cubDim);
>         FieldContainer<double> cubWeights(numCubPoints);
>
>         hexCub->getCubature(cubPoints, cubWeights);
>
>         Basis_HGRAD_HEX_C1_FEM<double, FieldContainer<double> >
> hexHGradBasis;
>         int numFieldsG = hexHGradBasis.getCardinality();
>
>         FieldContainer<double> hexGVals(numFieldsG, numCubPoints);
>         FieldContainer<double> hexGrads(numFieldsG, numCubPoints,
> spaceDim);
>
>
>         hexHGradBasis.getValues(hexGVals, cubPoints, OPERATOR_VALUE);
>         hexHGradBasis.getValues(hexGrads, cubPoints, OPERATOR_GRAD);
>
>
>         typedef CellTools<double>  CellTools;
>         typedef FunctionSpaceTools fst;
>         int numCells = 1;
>
>         FieldContainer<double> hexNodes(numCells, numNodesPerElem,
> spaceDim);
>
>         FieldContainer<double> hexJacobian(numCells, numCubPoints,
> spaceDim, spaceDim);
>         FieldContainer<double> hexJacobInv(numCells, numCubPoints,
> spaceDim, spaceDim);
>         FieldContainer<double> hexJacobDet(numCells, numCubPoints);
>
>         FieldContainer<double> localStiffMatrix(numCells, numFieldsG,
> numFieldsG);
>         FieldContainer<double> weightedMeasure(numCells, numCubPoints);
>         FieldContainer<double> hexGradsTransformed(numCells, numFieldsG,
> numCubPoints, spaceDim);
>         FieldContainer<double> hexGradsTransformedWeighted(numCells,
> numFieldsG, numCubPoints, spaceDim);
>
>         FieldContainer<double> rhsData(numCells, numCubPoints);
>         FieldContainer<double> localRHS(numCells, numFieldsG);
>         FieldContainer<double> hexGValsTransformed(numCells, numFieldsG,
> numCubPoints);
>         FieldContainer<double> hexGValsTransformedWeighted(numCells,
> numFieldsG, numCubPoints);
>         FieldContainer<double> physCubPoints(numCells, numCubPoints,
> cubDim);
>
>         Teuchos::Array<ST> values(8);
>         Teuchos::Array<long long int> cols(8);
>         Teuchos::RCP<matrix_type> A = rcp(new matrix_type(gmap, 27));
>
>         long long int rowIndex = 0, colIndex = 0, ierr, ix;
>         for(int i = 0; i < lElem; i++)
>         {
>                 for(int j = 0; j < numNodesPerElem; j++)
>                 {
>
>                         ix = (elemNode(i,j)%GNNX);
>                         hexNodes(0,j,0) = ix*hx;
>                         hexNodes(0,j,1) = ((elemNode(i,j)%(GNNX*GNNY) -
> ix)/GNNX)*hy;
>                         hexNodes(0,j,2) = (elemNode(i,j)/(GNNX*GNNY))*hz;
>                 }
>
>                 CellTools::setJacobian(hexJacobian, cubPoints, hexNodes,
> h8);
>                 CellTools::setJacobianInv(hexJacobInv, hexJacobian );
>                 CellTools::setJacobianDet(hexJacobDet, hexJacobian );
>
>                 fst::HGRADtransformGRAD<double>(hexGradsTransformed,
> hexJacobInv, hexGrads);
>
>                 fst::computeCellMeasure<double>(weightedMeasure,
> hexJacobDet, cubWeights);
>
>                 fst::multiplyMeasure<double>(hexGradsTransformedWeighted,
> weightedMeasure, hexGradsTransformed);
>
>                 fst::integrate<double>(localStiffMatrix,
> hexGradsTransformed, hexGradsTransformedWeighted, COMP_BLAS);
>                 for (int row = 0; row < numFieldsG; row++)
>                 {
>                         rowIndex = elemNode(i,row);
>                         for (int col = 0; col < numFieldsG; col++)
>                         {
>                                 cols[col] = elemNode(i,col);
>                                 values[col] = localStiffMatrix(0, row,
> col);
>                         }
>                         if ((hexNodes(0,row,2) == 0) || (hexNodes(0,row,2)
> == GNEZ))
>                                 std::fill(values.begin(), values.end(), 0);
>                         A->insertGlobalValues(rowIndex, cols, values);
>                 }
>
>         }
>
>         LLT drow = 0, maxind = 0, down = GNNX*GNNY, top =
> GNNX*GNNY*(GNNZ-1);
>         LO num_local_nodes;
>         maxind = gmap->getMaxGlobalIndex();
>
>         num_local_nodes = gmap->getNodeNumElements();
>         if ((drow < down) || (maxind >= top))
>         {
>                 for(int i = 0; i < num_local_nodes; i++)
>                 {
>                         drow = gmap->getGlobalElement(i);
>                         if ((drow < down) || (drow >= top))
>                                 A->replaceGlobalValues(drow,
> Teuchos::tuple<long long int>(drow), Teuchos::tuple<ST>(1.0));
>                 }
>         }
>
>         A->globalAssemble();
>         A->fillComplete();
>
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > x1 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (A->getDomainMap ()));
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > b1 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (A->getRangeMap ()));
>         x1->putScalar(1.0);
>         A->apply(*x1, *b1);
>         out << "Norm of b1 is " << b1->norm2() << std::endl;
>
>
>         Teuchos::RCP< map_type> gmap2 = rcp (new map_type (numGNodes,
> indexBase, comm, Tpetra::GloballyDistributed));
>         Teuchos::RCP<matrix_type> B = Teuchos::rcp(new matrix_type (gmap2,
> 0));
>         typedef Tpetra::Export<LO, LLT, NO> export_type;
>         export_type exporter (gmap, gmap2);
>         B->doExport (*A, exporter, Tpetra::INSERT);
>         B->fillComplete();
>
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > x2 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (B->getDomainMap ()));
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > b2 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (B->getRangeMap ()));
>         x2->putScalar(1.0);
>         B->apply(*x2, *b2);
>         out << "Norm of b2 is " << b2->norm2() << std::endl;
>
>         Teuchos::Array<ST> crsvals;
>         size_t NumEntries = 0;
>         Teuchos::Array<LLT> indices;
>         NumEntries = B->getNumEntriesInGlobalRow(grow);
>         if (NumEntries != Teuchos::OrdinalTraits<size_t>::invalid())
>         {
>                 std::cout << "outputting crs values " << std::endl;
>                 indices.resize(NumEntries); crsvals.resize(NumEntries);
>                 A->getGlobalRowCopy (grow, indices(), crsvals(),
> NumEntries);
>                 for(int i = 0; i < indices.size(); i++)
>                         std::cout << indices[i] << " " << crsvals[i] <<
> "\n";
>         }
>
>         Xpetra::UnderlyingLib lib = Xpetra::UseTpetra;
>         Teuchos::RCP<Xpetra::CrsMatrix<ST, LO, LLT, NO, LMO> > mueluM_;
>         if (matrix == "original")
>                 mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT, NO,
> LMO>(A));
>         else
>                 mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT, NO,
> LMO>(B));
>
>         Teuchos::RCP<Xpetra::Matrix <ST, LO, LLT, NO, LMO> > mueluA =
> rcp(new Xpetra::CrsMatrixWrap<ST, LO, LLT, NO, LMO>(mueluM_));
>
>         Teuchos::RCP<MueLu::Hierarchy<ST, LO, LLT, NO, LMO> > H = rcp(new
> MueLu::Hierarchy<ST, LO, LLT, NO, LMO> (mueluA));
>
>         out << "Begin setup of Heirarchy ..." << std::endl;
>
>         H->Setup();
>
>         out << "Finished setup of Hierarchy ..." << std::endl;
>
> }
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://software.sandia.gov/pipermail/trilinos-users/attachments/20140814/e70ba9bc/attachment-0001.html>


More information about the Trilinos-Users mailing list