[Trilinos-Users] [EXTERNAL] Muelu issue with exported Tpetra CrsMatrix

Andrey Prokopenko aprokop at sandia.gov
Thu Aug 14 14:25:45 MDT 2014


Hi Natarajan,

I'm going to take a look at it. Is 512^3 the smallest reproducible 
size? Have you tried running a smaller problem through valgrind?

Could you please also attach the output which happens before the 
corruption? That would help us to identify the responsible factory. It 
would also be great if you have the stack.

-Andrey

On Thu 14 Aug 2014 12:00:11 PM MDT, C.S. Natarajan wrote:
> Hi,
>       I am running into a strange issue when building the MG hierarchy
> with Muelu. This only happens for large problems. I am pasting a
> minimal example that can reproduce the error. The example assembles a
> Laplace operator on a structured grid with Q1 elements with Dirichlet
> boundary condition in the Z direction and Neumann BC's in the X and Y
> direction. In the example there are two global node maps. gmap and
> gmap2. gmap is used to build matrix A which has an unequal
> distribution of nodes in each processor. gmap2 is used to export
> matrix A from gmap to matrix B which contains approximately the same
> number of nodes in each processor. For a problem of size 512^3, while
> A builds fine, building the hierarchy with matrix B fails with a
> memory corruption error that ends with a MPI_Reduce_scatter error. I
> am using the public git version as of 08/11. The build is with intel
> C++ 14.0.0 and support for long long int is enabled (do i also need to
> enable ilp64 from mkl?)
>
> I would really appreciate any input on what I might be doing wrong or
> whether there is a bug in the Muelu library.
>
>
> TIA,
> C.S.N
>
> #include <sstream>
>
> //Epetra Includes
> #include "Epetra_BlockMap.h"
> #include "Epetra_Comm.h"
> #include "Epetra_MpiComm.h"
> #include "Epetra_VbrMatrix.h"
> #include "Epetra_SerialDenseMatrix.h"
> #include "Epetra_FEVector.h"
> #include "Epetra_FEVbrMatrix.h"
>
> // Intrepid includes
> #include "Intrepid_ArrayTools.hpp"
> #include "Intrepid_FunctionSpaceTools.hpp"
> #include "Intrepid_FieldContainer.hpp"
> #include "Intrepid_CellTools.hpp"
> #include "Intrepid_HGRAD_HEX_C1_FEM.hpp"
> #include "Intrepid_RealSpaceTools.hpp"
> #include "Intrepid_DefaultCubatureFactory.hpp"
> #include "Intrepid_Utils.hpp"
> #include <Intrepid_HGRAD_QUAD_C1_FEM.hpp>
>
> //Sacado Includes
> #include "Sacado.hpp"
>
> //Shards Includes
> #include "Shards_CellTopology.hpp"
>
> //Teuchos Includes
> #include "Teuchos_Array.hpp"
> #include "Teuchos_ArrayView.hpp"
> #include "Teuchos_ArrayViewDecl.hpp"
> #include "Teuchos_BLAS.hpp"
> #include "Teuchos_CommandLineProcessor.hpp"
> #include "Teuchos_DefaultMpiComm.hpp"
> #include "Teuchos_FancyOStream.hpp"
> #include "Teuchos_GlobalMPISession.hpp"
> #include "Teuchos_oblackholestream.hpp"
> #include "Teuchos_OrdinalTraits.hpp"
> #include "Teuchos_RCP.hpp"
> #include "Teuchos_ScalarTraits.hpp"
> #include "Teuchos_SerialDenseMatrix.hpp"
> #include "Teuchos_ArrayRCPDecl.hpp"
>
> //Lone survivor from Tpetra
> #include "Tpetra_DefaultPlatform.hpp"
> #include "Tpetra_Version.hpp"
> #include "Tpetra_BlockMap_decl.hpp"
> #include "Tpetra_BlockMap.hpp"
> #include "Tpetra_Vector.hpp"
> #include "Tpetra_VbrMatrix.hpp"
> #include "Tpetra_DistObject_decl.hpp"
> #include "Tpetra_BlockMultiVector.hpp"
> #include "Tpetra_BlockMultiVector_decl.hpp"
> #include "Tpetra_MultiVector.hpp"
>
> //Muelu includes
> #include "MueLu.hpp"
> #include "MueLu_FactoryManager.hpp"
> #include "MueLu_DirectSolver.hpp"
> #include "MueLu_Hierarchy.hpp"
> #include "MueLu_SaPFactory.hpp"
> #include "MueLu_GenericRFactory.hpp"
> #include "MueLu_RAPFactory.hpp"
> #include "MueLu_TentativePFactory.hpp"
> #include "MueLu_Ifpack2Smoother.hpp"
> #include "MueLu_SmootherFactory.hpp"
> #include "MueLu_RigidBodyModeFactory.hpp"
> #include <MueLu_UseDefaultTypes.hpp>
> #include <MueLu_MLParameterListInterpreter.hpp>
> #include "MueLu_Amesos2Smoother.hpp"
> #include <BelosConfigDefs.hpp>
> #include <BelosLinearProblem.hpp>
> #include <BelosBlockCGSolMgr.hpp>
> #include <BelosXpetraAdapter.hpp>
> #include <BelosMueLuAdapter.hpp>
> #include <MueLu_UseShortNames.hpp>
>
>
>
>
> using namespace Intrepid;
> typedef double                            ST;
> typedef Teuchos::ScalarTraits<ST>        SCT;
> typedef SCT::magnitudeType                MT;
> typedef long long int LLT;
> typedef Kokkos::DefaultNode::DefaultNodeType NO;
> typedef Tpetra::CrsMatrix<ST, LO, LLT, NO, LMO> matrix_type;
> typedef Tpetra::Map<LO, LLT, NO> map_type;
> using Teuchos::RCP;
> using Teuchos::rcp;
>
> int main(int argc, char *argv[])
> {
>
>         using Teuchos::Array;
>         Teuchos::oblackholestream nullStream;
>         Teuchos::GlobalMPISession mpiSession(&argc,&argv, NULL);
>         Epetra_MpiComm ecomm(MPI_COMM_WORLD);
>
>         Teuchos::RCP<const Teuchos::Comm<int> > comm =
> Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
>         size_t myRank = comm->getRank();
>         size_t nProc = comm->getSize();
>         std::ostream &out = (myRank == 0)?std::cout:nullStream;
>         Teuchos::RCP<Teuchos::FancyOStream> fout =
> Teuchos::getFancyOStream(Teuchos::rcpFromRef (out));
>
>         Teuchos::CommandLineProcessor clp;
>
>         int GNEX = 64;
>         clp.setOption("NX", &GNEX, "Number of elements in X-direction.");
>
>         int GNEY = 64;
>         clp.setOption("NY", &GNEY, "Number of elements in Y-direction.");
>
>         int GNEZ = 64;
>         clp.setOption("NZ", &GNEZ, "Number of elements in Z-direction.");
>
>         long long grow = 128;
>         clp.setOption("grow", &grow, "Global row number");
>
>         std::string matrix("original");
>         clp.setOption("matrix", &matrix, "original = A, exported = b");
>
>         clp.recogniseAllOptions(true);
>         clp.throwExceptions(false);
>
>         Teuchos::CommandLineProcessor::EParseCommandLineReturn
> parseReturn = clp.parse(argc, argv);
>         if(parseReturn ==
> Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
>         if(parseReturn !=
> Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) return 1;
>
>         const int numGElements = GNEX*GNEY*GNEZ;
>         const int GNNX = GNEX+1, GNNY = GNEY+1, GNNZ= GNEZ+1;
>         const int numGNodes    = GNNX*GNNY*GNNZ;
>         const int numGDof      = GNNX*GNNY*GNNZ;
>         out << "\t\t Number of global elements is : " << numGElements
> << "\n";
>         out << "\t\t Number of global nodes is    : " << numGNodes <<
> "\n";
>
>         Epetra_Map elemZMap(GNEZ, 0, ecomm);
>         long count = GNNZ/nProc;
>         long np; np = GNNZ - (nProc*count);
>         count += (myRank < np)?1:0;
>
>         long long int nnu = count*GNNX*GNNY;
>         LO indexBase = 0;
>         Teuchos::RCP<const Tpetra::Map<LO, LLT> > gmap =
> Teuchos::rcp(new Tpetra::Map<LO,LLT>(-1, nnu, indexBase, comm));
>         int numLNodes = gmap->getNodeNumElements();
>
>         int startOverlapZ = (myRank ==
> 0)?0:(gmap->getGlobalElement(0))-GNNX*GNNY;
>
>         int endOverlapZ = (myRank ==
> nProc-1)?(gmap->getGlobalElement(0)+(count-1)*GNNX*GNNY):(gmap->getGlobalElement(0)+count*GNNX*GNNY);
>
>
>         startOverlapZ /= (GNNX*GNNY); endOverlapZ /= (GNNX*GNNY);
>
>         int startX = 0; int endX = GNNX;
>
>         int startUY = (myRank == 0)?
> startOverlapZ:(startOverlapZ+1);
> // Unique map start index
>         int endUY   = (myRank ==
> nProc-1)?endOverlapZ:(endOverlapZ-1);                                   //
> Unique map end index
>
>         int numLElemZ = elemZMap.NumMyElements();
>         int numLElemX = GNEX, numLElemY = GNEY;
>         int numLElems = numLElemX*numLElemY*numLElemZ;
>         int startElemZ = elemZMap.GID(0);
>         int endElemZ = elemZMap.GID(numLElemZ-1);
>         int startNodeZ = startElemZ;
>         int endNodeZ = endElemZ+1;
>
>         typedef shards::CellTopology    CellTopology;
>         CellTopology
> h8(shards::getCellTopologyData<shards::Hexahedron<8> >() );
>         // Get dimensions
>         int numNodesPerElem = h8.getNodeCount();
>         int spaceDim = h8.getDimension();
>
>         FieldContainer<int> elemNode(numLElems, numNodesPerElem);
>         int lElem = 0;
>         for(int k = startElemZ; k <= endElemZ; k++)
>                 for (int j = 0; j < GNEY; j++)
>                         for (int i = 0; i < GNEX; i++)
>                         {
>                                 elemNode(lElem,0) = (GNNX*GNNY)*k +
> (GNNX)*j + i;
>                                 elemNode(lElem,1) = (GNNX*GNNY)*k +
> (GNNX)*j + i + 1;
>                                 elemNode(lElem,2) = (GNNX*GNNY)*k +
> (GNNX)*(j + 1) + i + 1;
>                                 elemNode(lElem,3) = (GNNX*GNNY)*k +
> (GNNX)*(j + 1) + i;
>                                 elemNode(lElem,4) = (GNNX*GNNY)*(k+1)
> + (GNNX)*j + i;
>                                 elemNode(lElem,5) = (GNNX*GNNY)*(k+1)
> + (GNNX)*j + i + 1;
>                                 elemNode(lElem,6) = (GNNX*GNNY)*(k+1)
> + (GNNX)*(j + 1) + i + 1;
>                                 elemNode(lElem,7) = (GNNX*GNNY)*(k+1)
> + (GNNX)*(j + 1) + i;
>                                 lElem++;
>                         }
>
>         double leftX = 0.0, rightX = 1.0;
>         double leftY = 0.0, rightY = 1.0;
>         double leftZ = 0.0, rightZ = 1.0;
>
>         hx = hy = hz = 1;
>         numLNodes = (numLElemZ+1)*(numLElemY+1)*(numLElemX+1);
>         FieldContainer<double> nodeCoord(numLNodes, spaceDim);
>         FieldContainer<int> nodeOnBoundary(numLNodes);
>
>         int inode = 0;
>
>         for (int k = startNodeZ; k <= endNodeZ; k++)
>                 for (int j = 0; j < GNNY; j++)
>                         for (int i = 0; i < GNNX; i++)
>                         {
>                                 nodeCoord(inode,0) = leftX + (double)i*hx;
>                                 nodeCoord(inode,1) = leftY + (double)j*hy;
>                                 nodeCoord(inode,2) = leftZ +
> (double)k*hz;
>                                 inode++;
>                         }
>
>         DefaultCubatureFactory<double>  cubFactory;
>         int cubDegree = 3;
>         Teuchos::RCP<Cubature<double> > hexCub = cubFactory.create(h8,
> cubDegree);
>
>         int cubDim       = hexCub->getDimension();
>         int numCubPoints = hexCub->getNumPoints();
>
>         FieldContainer<double> cubPoints(numCubPoints, cubDim);
>         FieldContainer<double> cubWeights(numCubPoints);
>
>         hexCub->getCubature(cubPoints, cubWeights);
>
>         Basis_HGRAD_HEX_C1_FEM<double, FieldContainer<double> >
> hexHGradBasis;
>         int numFieldsG = hexHGradBasis.getCardinality();
>
>         FieldContainer<double> hexGVals(numFieldsG, numCubPoints);
>         FieldContainer<double> hexGrads(numFieldsG, numCubPoints,
> spaceDim);
>
>
>         hexHGradBasis.getValues(hexGVals, cubPoints, OPERATOR_VALUE);
>         hexHGradBasis.getValues(hexGrads, cubPoints, OPERATOR_GRAD);
>
>
>         typedef CellTools<double>  CellTools;
>         typedef FunctionSpaceTools fst;
>         int numCells = 1;
>
>         FieldContainer<double> hexNodes(numCells, numNodesPerElem,
> spaceDim);
>
>         FieldContainer<double> hexJacobian(numCells, numCubPoints,
> spaceDim, spaceDim);
>         FieldContainer<double> hexJacobInv(numCells, numCubPoints,
> spaceDim, spaceDim);
>         FieldContainer<double> hexJacobDet(numCells, numCubPoints);
>
>         FieldContainer<double> localStiffMatrix(numCells, numFieldsG,
> numFieldsG);
>         FieldContainer<double> weightedMeasure(numCells, numCubPoints);
>         FieldContainer<double> hexGradsTransformed(numCells,
> numFieldsG, numCubPoints, spaceDim);
>         FieldContainer<double> hexGradsTransformedWeighted(numCells,
> numFieldsG, numCubPoints, spaceDim);
>
>         FieldContainer<double> rhsData(numCells, numCubPoints);
>         FieldContainer<double> localRHS(numCells, numFieldsG);
>         FieldContainer<double> hexGValsTransformed(numCells,
> numFieldsG, numCubPoints);
>         FieldContainer<double> hexGValsTransformedWeighted(numCells,
> numFieldsG, numCubPoints);
>         FieldContainer<double> physCubPoints(numCells, numCubPoints,
> cubDim);
>
>         Teuchos::Array<ST> values(8);
>         Teuchos::Array<long long int> cols(8);
>         Teuchos::RCP<matrix_type> A = rcp(new matrix_type(gmap, 27));
>
>         long long int rowIndex = 0, colIndex = 0, ierr, ix;
>         for(int i = 0; i < lElem; i++)
>         {
>                 for(int j = 0; j < numNodesPerElem; j++)
>                 {
>
>                         ix = (elemNode(i,j)%GNNX);
>                         hexNodes(0,j,0) = ix*hx;
>                         hexNodes(0,j,1) = ((elemNode(i,j)%(GNNX*GNNY)
> - ix)/GNNX)*hy;
>                         hexNodes(0,j,2) = (elemNode(i,j)/(GNNX*GNNY))*hz;
>                 }
>
>                 CellTools::setJacobian(hexJacobian, cubPoints,
> hexNodes, h8);
>                 CellTools::setJacobianInv(hexJacobInv, hexJacobian );
>                 CellTools::setJacobianDet(hexJacobDet, hexJacobian );
>
>                 fst::HGRADtransformGRAD<double>(hexGradsTransformed,
> hexJacobInv, hexGrads);
>
>                 fst::computeCellMeasure<double>(weightedMeasure,
> hexJacobDet, cubWeights);
>
>
> fst::multiplyMeasure<double>(hexGradsTransformedWeighted,
> weightedMeasure, hexGradsTransformed);
>
>                 fst::integrate<double>(localStiffMatrix,
> hexGradsTransformed, hexGradsTransformedWeighted, COMP_BLAS);
>                 for (int row = 0; row < numFieldsG; row++)
>                 {
>                         rowIndex = elemNode(i,row);
>                         for (int col = 0; col < numFieldsG; col++)
>                         {
>                                 cols[col] = elemNode(i,col);
>                                 values[col] = localStiffMatrix(0, row,
> col);
>                         }
>                         if ((hexNodes(0,row,2) == 0) ||
> (hexNodes(0,row,2) == GNEZ))
>                                 std::fill(values.begin(),
> values.end(), 0);
>                         A->insertGlobalValues(rowIndex, cols, values);
>                 }
>
>         }
>
>         LLT drow = 0, maxind = 0, down = GNNX*GNNY, top =
> GNNX*GNNY*(GNNZ-1);
>         LO num_local_nodes;
>         maxind = gmap->getMaxGlobalIndex();
>
>         num_local_nodes = gmap->getNodeNumElements();
>         if ((drow < down) || (maxind >= top))
>         {
>                 for(int i = 0; i < num_local_nodes; i++)
>                 {
>                         drow = gmap->getGlobalElement(i);
>                         if ((drow < down) || (drow >= top))
>                                 A->replaceGlobalValues(drow,
> Teuchos::tuple<long long int>(drow), Teuchos::tuple<ST>(1.0));
>                 }
>         }
>
>         A->globalAssemble();
>         A->fillComplete();
>
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > x1 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (A->getDomainMap ()));
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > b1 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (A->getRangeMap ()));
>         x1->putScalar(1.0);
>         A->apply(*x1, *b1);
>         out << "Norm of b1 is " << b1->norm2() << std::endl;
>
>
>         Teuchos::RCP< map_type> gmap2 = rcp (new map_type (numGNodes,
> indexBase, comm, Tpetra::GloballyDistributed));
>         Teuchos::RCP<matrix_type> B = Teuchos::rcp(new matrix_type
> (gmap2, 0));
>         typedef Tpetra::Export<LO, LLT, NO> export_type;
>         export_type exporter (gmap, gmap2);
>         B->doExport (*A, exporter, Tpetra::INSERT);
>         B->fillComplete();
>
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > x2 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (B->getDomainMap ()));
>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > b2 = rcp(new
> Tpetra::Vector<SC, LO, LLT, NO> (B->getRangeMap ()));
>         x2->putScalar(1.0);
>         B->apply(*x2, *b2);
>         out << "Norm of b2 is " << b2->norm2() << std::endl;
>
>         Teuchos::Array<ST> crsvals;
>         size_t NumEntries = 0;
>         Teuchos::Array<LLT> indices;
>         NumEntries = B->getNumEntriesInGlobalRow(grow);
>         if (NumEntries != Teuchos::OrdinalTraits<size_t>::invalid())
>         {
>                 std::cout << "outputting crs values " << std::endl;
>                 indices.resize(NumEntries); crsvals.resize(NumEntries);
>                 A->getGlobalRowCopy (grow, indices(), crsvals(),
> NumEntries);
>                 for(int i = 0; i < indices.size(); i++)
>                         std::cout << indices[i] << " " << crsvals[i]
> << "\n";
>         }
>
>         Xpetra::UnderlyingLib lib = Xpetra::UseTpetra;
>         Teuchos::RCP<Xpetra::CrsMatrix<ST, LO, LLT, NO, LMO> > mueluM_;
>         if (matrix == "original")
>                 mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT,
> NO, LMO>(A));
>         else
>                 mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT,
> NO, LMO>(B));
>
>         Teuchos::RCP<Xpetra::Matrix <ST, LO, LLT, NO, LMO> > mueluA =
> rcp(new Xpetra::CrsMatrixWrap<ST, LO, LLT, NO, LMO>(mueluM_));
>
>         Teuchos::RCP<MueLu::Hierarchy<ST, LO, LLT, NO, LMO> > H =
> rcp(new MueLu::Hierarchy<ST, LO, LLT, NO, LMO> (mueluA));
>
>         out << "Begin setup of Heirarchy ..." << std::endl;
>
>         H->Setup();
>
>         out << "Finished setup of Hierarchy ..." << std::endl;
>
> }
>
>
> _______________________________________________
> Trilinos-Users mailing list
> Trilinos-Users at software.sandia.gov
> https://software.sandia.gov/mailman/listinfo/trilinos-users


More information about the Trilinos-Users mailing list